Just a Crawler

  
  
  
  
use strict ;
use WWW:: Mechanize ;
use HTTP:: Cookies ;

###go to login page and login.
#my $url = 'https://www.google.com/accounts/ServiceLogin?hl=en&service=finance&nui=1&continue=http%3A%2F%2Ffinance.google.com%2Ffinance';
my $url = 'https://accounts.google.com/ServiceLogin' ;
my $username = $ARGV [ 0 ];
my $password = $ARGV [ 1 ];
my $keyword = $ARGV [ 2 ];
my $outputfile = $ARGV [ 3 ];
chomp ( $username );
chomp ( $password );
chomp ( $keyword );
chomp ( $outputfile );

print "usr: $username\n" ;
print "psw: $password\n" ;
print "keyword: $keyword\n" ;
print "output: $outputfile\n" ;
print "Searching ......\n" ;

my $mech = WWW:: Mechanize -> new ();
$mech -> cookie_jar ( HTTP:: Cookies -> new ());
$mech -> get ( $url );
$mech -> form_number ( 1 );
$mech -> field ( Email => $username );
$mech -> field ( Passwd => $password );
$mech -> click ();
#Go to the next link, now that we are logged in.
#$url = 'http://www.google.com/trends/viz?q=alan+kay&graph=all_csv&sa=N';
$url = 'http://www.google.com/trends/viz?q=' . $keyword . '&date=all&geo=cn&graph=all_csv&scale=1&sa=N' ;
#$url = 'http://finance.google.com/finance/portfolio?action=view&pid=1&pview=pview&output=csv';

$mech -> get ( $url );
my $output_page = $mech -> content ();

my $fh ;
open $fh , ">$outputfile" ;
print $fh $output_page ;


12/4/2011 Update

This script  can't work sometimes because of  Google 's ban. 


你可能感兴趣的:(Google,url,email,csv,login,output)