Perl operate mbox

One perl script, For extracting information from mbox.

#~~~~~ Author Aaron chen ~~~~~~#
use Mail::Util qw(read_mbox);
use Mail::Internet;
use DBI;
#use strict;
#-------------------------Table fields -----------------------------#
#SENDER_ID           VARCHAR2(50) NOT NULL,      ---> sender's email id of the message
#SENDER_NAME         VARCHAR2(50),               ---> sender's name of the message
#SENT_TIME           DATE NOT NULL,              ---> send time of the message
#RECEIVE_TIME        DATE NOT NULL,              ---> receive time of the message
#SUBJECT             VARCHAR2(100),              ---> subject of the message.
#ATTACHMENT_NAME     VARCHAR2(50),               ---> attachment file name
#COTS_UPD_DATE       DATE,
#COTS_UPD_BY         VARCHAR2(50)

#--------$$$ Mail Read,Also include the Multipart $$$-----------#
#-- export SHLIB_PATH=/tmp  --For ORACLE SUPPORT #


sub mailRead {

 my ($mailbox) = @_;
   
    print "Start Messages processing.... at<".getMailDate('hms').">\n";
    my $mbox=$mailbox.".".getMailDate('ymd');
    print "Ready to read Messages From $mbox\n";
   
    #---- Create Log file -----#
    my $logfile  =  "/rcotreg1/wrk/log/servicelog/mailread_".getMailDate('ymdhms').".log";
    print "Log is putting down in $logfile\n";
   
 #while ( -e $logfile ) {
 # sleep 1;
 #}
 open( LOG, ">$logfile" ) || die("Cannot open log file!\n");
 
    #----------- Obtain the mailfile basic information, such as message counts etc. ---------#
    print LOG "Start Messages processing.... at<".getMailDate('hms').">\n";
    my @m_c_count=getMsgCount($mbox);
    my $m_c_show= "Total Messages: $m_c_count[0]\t New Messages: $m_c_count[1]\t Unread Messages: $m_c_count[2]\n";
    print LOG $m_c_show;
   
 #------------- Initial the connection to Oracle -------------#
 #$ENV{"ORACLE_HOME"} = "C:/oralce";  # Need ORA_Home ENV,
 print LOG "Start connect the database.....\n";
 my $dbh = DBI->connect(
  'dbi:Oracle:host=localhost;sid=a3cotr;port=5201',
  'RCOTRPR1',
  'RCOTRPR1',
  {
   RaiseError => 1,
   AutoCommit => 0
  }
   )
   || die "Database connection not made: $DBI::errstr";

 #--------------- Get the the max msg_num from readmail table ---------#
 my @rows;
 my $msg_num;
 my $sender_mail;
 my $sent_date;
 my $sql_num =qq{ SELECT MSG_NUM,SENDER_ID,TO_CHAR(SENT_TIME,'dd Mon yyyy HH24:MI:SS') FROM COTS_TAB_READMAIL
                WHERE MSG_NUM=(SELECT NVL(MAX(MSG_NUM),0) FROM COTS_TAB_READMAIL)};

 my $stm = $dbh->prepare($sql_num);
 $stm->execute() or die "Can't execute statement: $DBI::errstr";
 print LOG "Database Connect Successfully!\n";
 
 while ( @rows = $stm->fetchrow_array() ) {
  $msg_num =$rows[0];       #----- Get the max msg_num for the following operationg.
  $sender_mail = $rows[1];
  $sent_date   = $rows[2];
        print LOG "Retrieve The Max Msg_num From Readmail Table:\n";
        print LOG "\t Msg_num:$msg_num\t Sender_mail:$sender_mail\t Sent_date:$sent_date\t From DB.\n";
 }

 #------ Construct the SQL for Insert data -------#
 my $sql = qq{INSERT INTO COTS_TAB_READMAIL VALUES
           (?,?,?,TO_DATE(?, 'dd mm yyyy  HH24:MI:SS'),
           TO_DATE(?, 'dd mm yyyy  HH24:MI:SS'),?,
           TO_DATE(?, 'yyyy-mm-dd'),
           sysdate,?)
           };

 my $sth = $dbh->prepare($sql);

 #-------------- Read messages in ------------------#
 print LOG "Start Read mailbox...\n";
 my @msgs = read_mbox $mbox
   or die "Can't read $mbox:$!";

 my $msg_count = $msg_num;   #---- Given the max msg_num from READMAIL table to $msg_count.
 
 my $msg_number     = 0;         #---- For log to dispaly the msg_number  
 my $msg_unread_num = 0;         #---- For calculate the unread msg counts
 my $process_date = &getMailDate('y-m-d');
 

 #------------ LOOP FOR THE MESSAGES -----------#
 
 foreach (@msgs) {

  my $mail = Mail::Internet->new($_);
  my $header = $mail->head;
  my $body   = $mail->body;

  my @from = $header->get("From");


  my $sender_id = getEmailID( $from[0] );
  print LOG "Process Message $msg_number SENT_ID:  $sender_id\n";

  @sender = $header->get("Sender");

        @RECEIVED_ID = $header->get('To');    # TO somebody
       
  @sent_time = $header->get("Date");    # Need confirm

  @received = $header->get("Received");

  @mime = $header->get("Content-Type");
  ($content_type) = @mime;
  chomp($content_type);

  @status = $header->get("Status");
  ($stat) = @status;
  chomp($stat);

  #------ Define the variants for oracle bind -------#
  $send_date     = '';
  $received_date = '';
  $reciever_id   = '';
  $sbj           = '';


  #------- Find the Unread mails -----------#
  #if ( $stat ne 'RO' and $stat ne 'O' ) {
  #if ( !defined($stat) or $stat eq 'RO' or $stat eq 'O' ) {
           
            $msg_unread_num++;
           
            print LOG "Extract Information From the Unread Messages.....\n";
           
   $send_date = dateFormat( @sent_time[0] );

   @rcd           = split( /;/, $received[0] );
   $received_date = dateFormat( @rcd[1] );

   print LOG "\n------ Find the original Subject in Multpart:Begin-Body -----\n";

   #print @{$body}, "\n";
   @by = @{$body};

   #----- For find the 'message/delivery-status' --------#
   #----- Get the subject from the original message -----#
   if ( $content_type =~ /multipart/ ) {
                print LOG "~~~~Enter the multipart~~~~~\n";
    $length = scalar(@by);
    @vector = ( 'F', 'F', 'F', 'F' );
    
    foreach $bodyline (@by){
     if ( $bodyline =~ /Content-Type: message\/delivery-status/ ) {
                        print LOG "~~~~~ Find message delivery-status~~~~~\n";
      $vector[0] = 'T';
     }
     if ( ($bodyline =~ /Final-Recipient:/) and ( $vector[0] eq 'T' ) )
     {
                         print LOG "~~~~~ Find Final-recipient~~~~~\n";
                         # --- if the receiver obtained from here --- #
                         @final_receiver=split(/;/,$bodyline);
                         print "The Final Receiver is $final_receiver[1]\n";
                         $temp_rc=$final_receiver[1];
                         $temp_rc =~ s/^\s+//g;
                      $temp_rc =~ s/\s+$//g;
                         $reciever_id = $temp_rc;
       $vector[1] = 'T';
     }
     if ( ($bodyline =~ /Action:/) and ( $vector[1] eq 'T' ) ) {
                         print LOG "~~~~~ Find Action~~~~~\n";
      $vector[2] = 'T';
     }
     if ( ($bodyline =~ /Content-Type: message\/rfc822/) and ( $vector[2] eq 'T' ) )
     {
      print LOG "~~~~~ Find rfc822~~~~~\n";
      $vector[3] = 'T';
      print LOG "now vector3:".$vector[3];
     } 
     if ( ($bodyline =~ /From:/) and ( $vector[3] eq 'T' ) ) {
      print LOG "~~~~~ Find original Sender_id~~~~~\n";
      @sd = split( /From:/, $bodyline );
      $sender_id = getEmailID($sd[1]);
     }
     if ( ($bodyline =~ /Subject:/) and ( $vector[3] eq 'T' ) ) {
      print LOG "~~~~~ Find original subject~~~~~\n";
      @sb = split( /Subject:/, $bodyline );
      $sb[1] =~ s/^\s+//g;    #---- Trim whitespace
      $sb[1] =~ s/\s+$//g;    #---- Trim whitespace
      $sbj = $sb[1];
      last;
     }
    }
    
    if ( $vector[3] ne 'T' ) {
     @subject = $header->get("Subject");
     print LOG "It's a Multipart mail,but not meet requirement!\n";
     $temp = join( '', @subject );
     $temp =~ s/^(\s)+//g;       #---- Trim whitespace
     $temp =~ s/\s+$//g;         #---- Trim whitespace
     $sbj = $temp;
     print LOG "The Subject is: $sbj\n";
    }
            print LOG "\n------- Find the original Subject in Multpart:End-Body ------\n";
   }
   else {
     #---------- Get the Subject from header:FAX or SinglePart -----------#
    @subject = $header->get("Subject");
    $temp = join( '', @subject );
    $temp =~ s/^(\s)+//g;           #---- Trim whitespace
    $temp =~ s/\s+$//g;             #---- Trim whitespace
    $sbj = $temp;
    print LOG "The Subject is: $sbj\n";
   }
   

   if ( !defined($sbj)) {
    $sbj = '';
   }
   if ( !defined($send[0]) ) {
    $send[0] = '';
   }
   $msg_count++;

   #-------- Data bind ------------#
   $sth->bind_param( 1, $msg_count );        # Reconfigure the msg_num to auto_increment
   $sth->bind_param( 2, $sender_id );        # '[email protected]'  TODO
   $sth->bind_param( 3, $reciever_id );      # TODO Receiver ID, normal mail :null, bounced: Final_recipient
   $sth->bind_param( 4, $send_date );        # '2007-07-19 14:20:55'  Sent_time
   $sth->bind_param( 5, $received_date );    # '2007-07-20 15:20:55'Receive_time
   $sth->bind_param( 6, $sbj );              # Subject
   $sth->bind_param( 7, $process_date);      # Processed date
   $sth->bind_param( 8, 'CHENPING' );

   #------ INSERT DATA TO DB ------ #
   $sth->execute() or die "Can't execute statement: $DBI::errstr";
   $dbh->commit;
   print LOG "$sender_id, Insert successfully!\n";
  #}    # --- End the judge of Status 'RO' And 'O'
  
  $msg_number++;
 }    # --- End the @msgs Loop
 
 
 #----- Confirm the insert messages entries, if not match unread counts,then rollback ---#
 
 my $sql_cfm =qq{ SELECT COUNT(*) FROM COTS_TAB_READMAIL
    WHERE PROCESSED_DATE=TO_DATE(?,'yyyy-mm-dd')};
               
 print LOG "$sql_cfm\n";
 my $stm_cfm = $dbh->prepare($sql_cfm);
 $stm_cfm->bind_param(1,$process_date);
 $stm_cfm->execute() or die "Can't execute statement: $DBI::errstr";
 my $msg_cfm;
 while ( @datasets = $stm_cfm->fetchrow_array() ) {
  $msg_cfm =$datasets[0];       #----- Get the max msg_num for the following operationg.
        print LOG "Retrieve the counts which inserted into Readmail table: $msg_cfm\n";
 }
 
 my $sql_delete =qq{ DELETE FROM COTS_TAB_READMAIL
   WHERE PROCESSED_DATE=TO_DATE(?,'yyyy-mm-dd')};
 
 if( $msg_cfm  != $m_c_count[0] ){
  print "!!!All Messages are not read!!!\n";
  print LOG "!!!All Messages are not read, Unread counts : $m_c_count[0] !!!\n";
  my $stm_delete = $dbh->prepare($sql_delete);
  $stm_delete->bind_param(1,$process_date);
     $stm_delete->execute() or die "Can't execute statement: $DBI::errstr";
     $dbh->commit;
     print LOG "Data,Rollback!\n";
     print "Data,Rollback!\n";
    
 }else{
  print LOG "$m_c_count[0] Messages Read And Update to DB!\n";
  print "$m_c_count[0] Messages Read And Update to DB!\n";
 }
 
 print LOG "All Messages Counts: $msg_unread_num \n";  # TODO  change the name: $msg_unread_num
 #------- Close the connection ------#
 $dbh->disconnect();
    close(LOG);
    print "Finish Messages Processing.... at<".getMailDate('hms').">\n";
    print LOG "Finish Messages Processing.... at<".getMailDate('hms').">\n";
}

#-----------$$$ Helper Subroutine $$$----------#

sub getMsgCount {

 my ($mbox) = @_;

 my @msgs = read_mbox $mbox or die "Can't read $mbox";
 #my @msgs         = read_mbox $mbox or die "Can't read $mbox:$!";
 my $new_count    = 0;
 my $unread_count = 0;
 my $all_count    = 0;
    my @vector=();
 foreach (@msgs) {
  my $mail = Mail::Internet->new($_);
  my $header = $mail->head;
  my @status = $header->get('Status');    #--- Show the unread messages
  my($stat) = @status;
  chomp($stat);

  if ( !defined($stat) ) {
   $new_count++;
   $unread_count++;
  }
  if ( $stat eq 'O' ) {
   $unread_count++;
  }
  $all_count++;
 }
 $vector[0]=$all_count;
 $vector[1]=$new_count;
 $vector[2]=$unread_count;
 return @vector;
 
}

#----- Extract the Date Format from the sent_time ------#
sub dateFormat {
 my ($st_time) = @_;

 my @dateformat;
 my @dateformat_temp = split( /,/, $st_time );
 if ( $dateformat_temp[1] =~ /\+/ ) {
  @dateformat = split( /\+/, $dateformat_temp[1] );
 }
 elsif ( $dateformat_temp[1] =~ /-/ ) {
  @dateformat = split( /-/, $dateformat_temp[1] );
 }
 $dateformat[0] =~ s/^\s+//g;
 $dateformat[0] =~ s/\s+$//g;
 return $dateformat[0];

}

#------ Extract the Sender emailid and validate the lenght ------#
sub getEmailID {
 my ($mailid) = @_;

 my $mailbody;
 $mailid =~ s/^\s+//g;
 $mailid =~ s/\s+$//g;    #----- Trim a string in the header and tailer;
 my $len;
 if ( $mailid =~ /</ ) {
  my @mail = split( /</, $mailid );
  $mailbody = $mail[1];
  if ( chop($mailbody) eq '>' ) {
   $len = rindex $mailbody . "\$", "\$";   #----- Get the string length
   if ( $len < 51 ) {
    return $mailbody;
   }
   else {
    return "illegal email";
   }
  }
 }
 else {
  $len = rindex $mailid . "\$", "\$";
  if ( $len < 51 ) {
   return $mailid;
  }
  else {
   return "illegal email";
  }
 }
}

sub getMailDate {
   
    my ($timeformat)=@_;
   
    # $timeformat = ymd    :yyyymmdd
    # $timeformat = y-m-d  :yyyy-mm-dd
    # $timeformat = hms    :HH:MM:SS
    # $timeformat = ymdhms :yyyymmdd_hh_mm_ss
   
 my @months = qw(01 02 03 04 05 06 07 08 09 10 11 12);
 my (
  $Second, $Minute,  $Hour, $Day, $Month,
  $Year,   $WeekDay, $DayOfYear, $IsDST
   )
   = localtime( time - 86400 );
 $Year += 1900;
 my $date;
 if($timeformat eq 'ymd'){
  $date = "$Year$months[$Month]$Day";
 }elsif($timeformat eq 'y-m-d'){
  $date = "$Year-$months[$Month]-$Day";
 }elsif($timeformat eq 'hms'){
  $date = "$Hour:$Minute:$Second";
 }elsif($timeformat eq 'ymdhms'){
  $date = "$Year$months[$Month]$Day_$Hour:$Minute:$Second";
 }else{
  $date = "$Year$months[$Month]$Day";
 }
 
 return $date;
}


sub poolLog {
 my ($logfile)=@_;

 # --- create a log file --- #
 $logfile  =  $logfile.".log";
 while ( -e $logfile ) {
  sleep 2;
 }
 
 # --- Write the log in the file --- #
 open( LOGS, ">$logfile" ) || die("Cannot open log file!\n");
 print LOGS "Just for read mail test\n";
 close(LOGS);

}

#-------------- TEST MAIN -----------------#
&mailRead("rcotbg1");
#use Getopt::Std;
#use vars qw($opt_d $opt_f $opt_p);
#getopts('d:f:p');

#print &getMailFileDate()  if $opt_d;
#print &getMailDate()      if $opt_f;
#print "helloworld"        if $opt_p;


#&getMsgCount('D:/mbox/rcotbg2.txt');
#&poolLog('test');

 

 

 

你可能感兴趣的:(oracle,sql,c,qq,perl)