#!/usr/bin/perl # get_journal_info.pl # Get Journal Info for ulrichs # Hekman Library 2007 # Chris Hirt # 2007-08-06 # # cgi parameters: # issn - (with or without dash) # returns: # pipe-delimited ulrichs information # # NOTE: I found this URL query after the fact that may be a better way to implement the Ulrichs scrape: # http://www.ulrichsweb.com/ulrichsweb/Search/vendor_redirect.asp?oVendor=SFX&oIssn=[issn] # this is not implemented currently though use warnings; use strict; use CGI; use LWP::Simple; #### set this to 0 to turn off using the cache; or 1 to use the cache my $useCache = 1; #### # CGI header my $q = new CGI; # CGI parameters my $issn = $q->param('issn'); # validate issn param if (not defined $issn) { &print_result('-'); } $issn =~ s/^\s*//; #remove leading spaces $issn =~ s/\s*$//; #remove trailing spaces &print_result('-') if ($issn !~ /^(\d{4})-?(\d{3}(\d|X))$/); $issn = "$1-$2"; my $issn_compact = $1 . $2; # setup variables my $serial_uid = ''; my $refereed = 0; # refereed journal (0=false / 1=true) my $indexed = 0; # indexed journal (0=false / 1=true) my $frequency = 0; # journal frequency (0=unknown / text=Monthly|Yearly|Weekly|other) my $type = 0; # document type (0=unknown / text=Academic/Scholarly|other) ################################################# # path to a safe place to store the cached version of ulrich's information my $textdb = "/path/to/a/safe/place/journal_info.txt"; ################################################# if ($useCache) { my $cached_string = `grep "^$issn_compact" $textdb`; # return cached version if it exists if ($cached_string ne '') { &print_result($cached_string); } } my $results_page = `wget -O- 'http://www.ulrichsweb.com/ulrichsweb/Search/doAdvancedSearch.asp?Action=Search&collection=SERIAL&ResultTemplate=quickSearchResults.hts&ResultCount=1&QuickCriteria=ISSN&QuickCriteriaText=$issn'`; if ($results_page =~ /\&serial_uid=(\d+)\&issn=(\d+)/) { $serial_uid = $1; $issn_compact = $2; } if ($results_page =~ /\/ulrichsweb\/images\/searchResults\/Refereed.gif/) { $refereed = 1; } if ($serial_uid) { $results_page = `wget -O- 'http://www.ulrichsweb.com/ulrichsweb/Search/fullCitation.asp?navPage=1&tab=1&serial_uid=$serial_uid&issn=$issn_compact'`; } else { &print_result("-"); } if ($results_page =~ /Abstracted\/Indexed:/) { $indexed = 1; } if ($results_page =~ m@Frequency:[^\n]+\n([^\n]+)@s) { $frequency = $1; } if ($results_page =~ m@Document Type:[^\n]+\n([^\n]+)@s) { $type = $1; $type =~ s/; / -- /; $type =~ s/^Government$/Government Document/; # special rule } if ($useCache) { &cache_and_print("$issn_compact|$serial_uid|$frequency|$refereed|$indexed|$type"); } else { &print_result("$issn_compact|$serial_uid|$frequency|$refereed|$indexed|$type"); } exit; sub print_result() { print $q->header('text/plain'); my $result = shift; print $result; exit; } sub cache_and_print() { my $result = shift; `echo "$result" >> $textdb`; # cache the string &print_result($result); }