#!/usr/bin/perl use strict; use Ace; use WWW::Mechanize; use Digest::MD5 qw/md5_hex/; my $password = shift or die "Usage $0 [password]\n"; chomp $password; # The cross-referenced URL to submit to citeulike. my $url = 'http://www.citeulike.org/posturl?url=http://www.wormbase.org/db/misc/paper?name=%s'; # Connect to our aceserver and get all the papers. my $db = Ace->connect(-host=>'aceserver.cshl.org',-port=>2005); my @papers = $db->fetch(Paper=>'*'); # A new mech object with in-memory cookies. my $mech = WWW::Mechanize->new( cookie_jar => {}, ); my %complete = get_complete(); # Login to citeulike. $mech->get("http://www.citeulike.org/login?from=%2f"); $mech->submit_form( form_name => 'frm', fields => { username => 'tharris', password => $password, }); open ERR,">status-errors.txt"; open COMPLETE,">>status-uploaded.txt"; my $total = scalar @papers; my $c; foreach my $paper (@papers) { next unless $paper =~ /^WBPaper/; $c++; if ($complete{$paper->name}) { print STDERR "Already complete: $paper. Skipping..."; print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n"; next; } sleep(1); # Get the citeulike submit form for each paper. $mech->get(sprintf($url,$paper)); # Add some tags my @tags = qw/WormBase caenorhabditis_elegans celegans c_elegans elegans nematode/; my @keywords = eval { $paper->Keyword }; foreach my $key (@keywords) { $key =~ s/ /_/g; $key =~ s/\//-/g; push @tags,lc($key); } # Genes my @genes = $paper->Gene; if (@genes < 10) { foreach (@genes) { push @tags,$_->CGC_name if $_->CGC_name; push @tags,$_->Sequence_name if $_->Sequence_name && $_->Sequence_name ne $_->Public_name; } } push @tags,lc($paper->Type); # print join("; ",@tags) . "\n"; my $content = $mech->content; # Make sure the paper has sufficient data for citeulike to parse. unless ($mech->form_name('frm')) { print ERR "$paper: insufficient data\n"; next; } # print $content; # Get the article ID stored in a hidden field. # Formatting seems to vary. $content =~ /input type="hidden" value="(.*)" name="article_id"/; my $article_id = $1; unless ($article_id) { $content =~ /input type="hidden" name="article_id" value="(\d*)"/; $article_id = $1; } # Get the unique hidden name and value. $content =~ /document\.frm\.(.*)\.value = hex_md5\('(.*)'\);/; my $key_value = $1; my $to_hex = $2; # Dump out the hexing js. Awkward. dump_js($to_hex); # Execute the js and retrieve the hexed value. my $hex = execute_js(); # Submit the form with appropriate fields. # wname => '', my $response = $mech->submit_form( form_name => 'frm', fields => { tags => join(" ",@tags), post_action => 'new', article_id => $article_id, url => sprintf($url,$paper), src_username => '', $key_value => $hex, to_own_library => 'on', to_group_6190 => 'on', to_read => '3', }); if ($response->is_success) { print COMPLETE "$paper\n"; } else { print ERR "$paper\n"; } print STDERR "$c of $total: $paper $article_id $key_value $hex; " . $response->is_success; print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n"; } # Some of the hidden fields on the citeulike new submission form. # # # # # # # # Dump out a javascript file suitable for generating the hexed ID. sub dump_js { my $id = shift; open JS,">get_hex.js"; print JS <>5]|=0x80<<(24-len%32); x[((len+64>>9)<<4)+15]=len; var w=Array(80); var a=1732584193; var b=-271733879; var c=-1732584194; var d=271733878; var e=-1009589776; for(var i=0;i>16)+(y>>16)+(lsw>>16); return(msw<<16)|(lsw&0xFFFF); } function rol(num,cnt){ return(num<>>(32-cnt)); } function str2binb(str){ var bin=Array(); var mask=(1<>5]|=(str.charCodeAt(i/md5size)&mask)<<(32-md5size-i%32); return bin; } function binb2str(bin){ var str=""; var mask=(1<>5]>>>(32-md5size-i%32))&mask); return str; } function binb2hex(binarray){ var hex_tab="0123456789ABCDEF"; var str=""; for(var i=0;i>2]>>((3-i%4)*8+4)))&0xF)+hex_tab.charAt((0x7*(binarray[i>>2]>>((3-i%4)*8)))&0xF); } print(str); return str; } hex_md5("$id"); END ; } # Execute the javascript. sub execute_js { my $hex = `./spidermonkey/src/Darwin_DBG.OBJ/js get_hex.js`; system("rm -rf get_hex.js"); chomp $hex; return $hex; } sub get_complete { my %complete; open IN,"status-uploaded.txt"; while () { chomp; $complete{$_}++; } close IN; return %complete; }