#!/usr/bin/perl
use strict;
use Ace;
use WWW::Mechanize;
use Digest::MD5 qw/md5_hex/;
my $password = shift or die "Usage $0 [password]\n";
chomp $password;
# The cross-referenced URL to submit to citeulike.
my $url = 'http://www.citeulike.org/posturl?url=http://www.wormbase.org/db/misc/paper?name=%s';
# Connect to our aceserver and get all the papers.
my $db = Ace->connect(-host=>'aceserver.cshl.org',-port=>2005);
my @papers = $db->fetch(Paper=>'*');
# A new mech object with in-memory cookies.
my $mech = WWW::Mechanize->new(
cookie_jar => {},
);
my %complete = get_complete();
# Login to citeulike.
$mech->get("http://www.citeulike.org/login?from=%2f");
$mech->submit_form(
form_name => 'frm',
fields => { username => 'tharris',
password => $password,
});
open ERR,">status-errors.txt";
open COMPLETE,">>status-uploaded.txt";
my $total = scalar @papers;
my $c;
foreach my $paper (@papers) {
next unless $paper =~ /^WBPaper/;
$c++;
if ($complete{$paper->name}) {
print STDERR "Already complete: $paper. Skipping...";
print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
next;
}
sleep(1);
# Get the citeulike submit form for each paper.
$mech->get(sprintf($url,$paper));
# Add some tags
my @tags = qw/WormBase caenorhabditis_elegans celegans c_elegans elegans nematode/;
my @keywords = eval { $paper->Keyword };
foreach my $key (@keywords) {
$key =~ s/ /_/g;
$key =~ s/\//-/g;
push @tags,lc($key);
}
# Genes
my @genes = $paper->Gene;
if (@genes < 10) {
foreach (@genes) {
push @tags,$_->CGC_name if $_->CGC_name;
push @tags,$_->Sequence_name if $_->Sequence_name && $_->Sequence_name ne $_->Public_name;
}
}
push @tags,lc($paper->Type);
# print join("; ",@tags) . "\n";
my $content = $mech->content;
# Make sure the paper has sufficient data for citeulike to parse.
unless ($mech->form_name('frm')) {
print ERR "$paper: insufficient data\n";
next;
}
# print $content;
# Get the article ID stored in a hidden field.
# Formatting seems to vary.
$content =~ /input type="hidden" value="(.*)" name="article_id"/;
my $article_id = $1;
unless ($article_id) {
$content =~ /input type="hidden" name="article_id" value="(\d*)"/;
$article_id = $1;
}
# Get the unique hidden name and value.
$content =~ /document\.frm\.(.*)\.value = hex_md5\('(.*)'\);/;
my $key_value = $1;
my $to_hex = $2;
# Dump out the hexing js. Awkward.
dump_js($to_hex);
# Execute the js and retrieve the hexed value.
my $hex = execute_js();
# Submit the form with appropriate fields.
# wname => '',
my $response = $mech->submit_form(
form_name => 'frm',
fields => { tags => join(" ",@tags),
post_action => 'new',
article_id => $article_id,
url => sprintf($url,$paper),
src_username => '',
$key_value => $hex,
to_own_library => 'on',
to_group_6190 => 'on',
to_read => '3',
});
if ($response->is_success) {
print COMPLETE "$paper\n";
} else {
print ERR "$paper\n";
}
print STDERR "$c of $total: $paper $article_id $key_value $hex; " . $response->is_success;
print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
}
# Some of the hidden fields on the citeulike new submission form.
#
#
#
#
#
#
#
# Dump out a javascript file suitable for generating the hexed ID.
sub dump_js {
my $id = shift;
open JS,">get_hex.js";
print JS <>5]|=0x80<<(24-len%32);
x[((len+64>>9)<<4)+15]=len;
var w=Array(80);
var a=1732584193;
var b=-271733879;
var c=-1732584194;
var d=271733878;
var e=-1009589776;
for(var i=0;i>16)+(y>>16)+(lsw>>16);
return(msw<<16)|(lsw&0xFFFF);
}
function rol(num,cnt){
return(num<>>(32-cnt));
}
function str2binb(str){
var bin=Array();
var mask=(1<>5]|=(str.charCodeAt(i/md5size)&mask)<<(32-md5size-i%32);
return bin;
}
function binb2str(bin){
var str="";
var mask=(1<>5]>>>(32-md5size-i%32))&mask);
return str;
}
function binb2hex(binarray){
var hex_tab="0123456789ABCDEF";
var str="";
for(var i=0;i>2]>>((3-i%4)*8+4)))&0xF)+hex_tab.charAt((0x7*(binarray[i>>2]>>((3-i%4)*8)))&0xF);
}
print(str);
return str;
}
hex_md5("$id");
END
;
}
# Execute the javascript.
sub execute_js {
my $hex = `./spidermonkey/src/Darwin_DBG.OBJ/js get_hex.js`;
system("rm -rf get_hex.js");
chomp $hex;
return $hex;
}
sub get_complete {
my %complete;
open IN,"status-uploaded.txt";
while () {
chomp;
$complete{$_}++;
}
close IN;
return %complete;
}