One perl script, For extracting information from mbox.
#~~~~~ Author Aaron chen ~~~~~~#
use Mail::Util qw(read_mbox);
use Mail::Internet;
use DBI;
#use strict;
#-------------------------Table fields -----------------------------#
#SENDER_ID VARCHAR2(50) NOT NULL, ---> sender's email id of the message
#SENDER_NAME VARCHAR2(50), ---> sender's name of the message
#SENT_TIME DATE NOT NULL, ---> send time of the message
#RECEIVE_TIME DATE NOT NULL, ---> receive time of the message
#SUBJECT VARCHAR2(100), ---> subject of the message.
#ATTACHMENT_NAME VARCHAR2(50), ---> attachment file name
#COTS_UPD_DATE DATE,
#COTS_UPD_BY VARCHAR2(50)
#--------$$$ Mail Read,Also include the Multipart $$$-----------#
#-- export SHLIB_PATH=/tmp --For ORACLE SUPPORT #
sub mailRead {
my ($mailbox) = @_;
print "Start Messages processing.... at<".getMailDate('hms').">\n";
my $mbox=$mailbox.".".getMailDate('ymd');
print "Ready to read Messages From $mbox\n";
#---- Create Log file -----#
my $logfile = "/rcotreg1/wrk/log/servicelog/mailread_".getMailDate('ymdhms').".log";
print "Log is putting down in $logfile\n";
#while ( -e $logfile ) {
# sleep 1;
#}
open( LOG, ">$logfile" ) || die("Cannot open log file!\n");
#----------- Obtain the mailfile basic information, such as message counts etc. ---------#
print LOG "Start Messages processing.... at<".getMailDate('hms').">\n";
my @m_c_count=getMsgCount($mbox);
my $m_c_show= "Total Messages: $m_c_count[0]\t New Messages: $m_c_count[1]\t Unread Messages: $m_c_count[2]\n";
print LOG $m_c_show;
#------------- Initial the connection to Oracle -------------#
#$ENV{"ORACLE_HOME"} = "C:/oralce"; # Need ORA_Home ENV,
print LOG "Start connect the database.....\n";
my $dbh = DBI->connect(
'dbi:Oracle:host=localhost;sid=a3cotr;port=5201',
'RCOTRPR1',
'RCOTRPR1',
{
RaiseError => 1,
AutoCommit => 0
}
)
|| die "Database connection not made: $DBI::errstr";
#--------------- Get the the max msg_num from readmail table ---------#
my @rows;
my $msg_num;
my $sender_mail;
my $sent_date;
my $sql_num =qq{ SELECT MSG_NUM,SENDER_ID,TO_CHAR(SENT_TIME,'dd Mon yyyy HH24:MI:SS') FROM COTS_TAB_READMAIL
WHERE MSG_NUM=(SELECT NVL(MAX(MSG_NUM),0) FROM COTS_TAB_READMAIL)};
my $stm = $dbh->prepare($sql_num);
$stm->execute() or die "Can't execute statement: $DBI::errstr";
print LOG "Database Connect Successfully!\n";
while ( @rows = $stm->fetchrow_array() ) {
$msg_num =$rows[0]; #----- Get the max msg_num for the following operationg.
$sender_mail = $rows[1];
$sent_date = $rows[2];
print LOG "Retrieve The Max Msg_num From Readmail Table:\n";
print LOG "\t Msg_num:$msg_num\t Sender_mail:$sender_mail\t Sent_date:$sent_date\t From DB.\n";
}
#------ Construct the SQL for Insert data -------#
my $sql = qq{INSERT INTO COTS_TAB_READMAIL VALUES
(?,?,?,TO_DATE(?, 'dd mm yyyy HH24:MI:SS'),
TO_DATE(?, 'dd mm yyyy HH24:MI:SS'),?,
TO_DATE(?, 'yyyy-mm-dd'),
sysdate,?)
};
my $sth = $dbh->prepare($sql);
#-------------- Read messages in ------------------#
print LOG "Start Read mailbox...\n";
my @msgs = read_mbox $mbox
or die "Can't read $mbox:$!";
my $msg_count = $msg_num; #---- Given the max msg_num from READMAIL table to $msg_count.
my $msg_number = 0; #---- For log to dispaly the msg_number
my $msg_unread_num = 0; #---- For calculate the unread msg counts
my $process_date = &getMailDate('y-m-d');
#------------ LOOP FOR THE MESSAGES -----------#
foreach (@msgs) {
my $mail = Mail::Internet->new($_);
my $header = $mail->head;
my $body = $mail->body;
my @from = $header->get("From");
my $sender_id = getEmailID( $from[0] );
print LOG "Process Message $msg_number SENT_ID: $sender_id\n";
@sender = $header->get("Sender");
@RECEIVED_ID = $header->get('To'); # TO somebody
@sent_time = $header->get("Date"); # Need confirm
@received = $header->get("Received");
@mime = $header->get("Content-Type");
($content_type) = @mime;
chomp($content_type);
@status = $header->get("Status");
($stat) = @status;
chomp($stat);
#------ Define the variants for oracle bind -------#
$send_date = '';
$received_date = '';
$reciever_id = '';
$sbj = '';
#------- Find the Unread mails -----------#
#if ( $stat ne 'RO' and $stat ne 'O' ) {
#if ( !defined($stat) or $stat eq 'RO' or $stat eq 'O' ) {
$msg_unread_num++;
print LOG "Extract Information From the Unread Messages.....\n";
$send_date = dateFormat( @sent_time[0] );
@rcd = split( /;/, $received[0] );
$received_date = dateFormat( @rcd[1] );
print LOG "\n------ Find the original Subject in Multpart:Begin-Body -----\n";
#print @{$body}, "\n";
@by = @{$body};
#----- For find the 'message/delivery-status' --------#
#----- Get the subject from the original message -----#
if ( $content_type =~ /multipart/ ) {
print LOG "~~~~Enter the multipart~~~~~\n";
$length = scalar(@by);
@vector = ( 'F', 'F', 'F', 'F' );
foreach $bodyline (@by){
if ( $bodyline =~ /Content-Type: message\/delivery-status/ ) {
print LOG "~~~~~ Find message delivery-status~~~~~\n";
$vector[0] = 'T';
}
if ( ($bodyline =~ /Final-Recipient:/) and ( $vector[0] eq 'T' ) )
{
print LOG "~~~~~ Find Final-recipient~~~~~\n";
# --- if the receiver obtained from here --- #
@final_receiver=split(/;/,$bodyline);
print "The Final Receiver is $final_receiver[1]\n";
$temp_rc=$final_receiver[1];
$temp_rc =~ s/^\s+//g;
$temp_rc =~ s/\s+$//g;
$reciever_id = $temp_rc;
$vector[1] = 'T';
}
if ( ($bodyline =~ /Action:/) and ( $vector[1] eq 'T' ) ) {
print LOG "~~~~~ Find Action~~~~~\n";
$vector[2] = 'T';
}
if ( ($bodyline =~ /Content-Type: message\/rfc822/) and ( $vector[2] eq 'T' ) )
{
print LOG "~~~~~ Find rfc822~~~~~\n";
$vector[3] = 'T';
print LOG "now vector3:".$vector[3];
}
if ( ($bodyline =~ /From:/) and ( $vector[3] eq 'T' ) ) {
print LOG "~~~~~ Find original Sender_id~~~~~\n";
@sd = split( /From:/, $bodyline );
$sender_id = getEmailID($sd[1]);
}
if ( ($bodyline =~ /Subject:/) and ( $vector[3] eq 'T' ) ) {
print LOG "~~~~~ Find original subject~~~~~\n";
@sb = split( /Subject:/, $bodyline );
$sb[1] =~ s/^\s+//g; #---- Trim whitespace
$sb[1] =~ s/\s+$//g; #---- Trim whitespace
$sbj = $sb[1];
last;
}
}
if ( $vector[3] ne 'T' ) {
@subject = $header->get("Subject");
print LOG "It's a Multipart mail,but not meet requirement!\n";
$temp = join( '', @subject );
$temp =~ s/^(\s)+//g; #---- Trim whitespace
$temp =~ s/\s+$//g; #---- Trim whitespace
$sbj = $temp;
print LOG "The Subject is: $sbj\n";
}
print LOG "\n------- Find the original Subject in Multpart:End-Body ------\n";
}
else {
#---------- Get the Subject from header:FAX or SinglePart -----------#
@subject = $header->get("Subject");
$temp = join( '', @subject );
$temp =~ s/^(\s)+//g; #---- Trim whitespace
$temp =~ s/\s+$//g; #---- Trim whitespace
$sbj = $temp;
print LOG "The Subject is: $sbj\n";
}
if ( !defined($sbj)) {
$sbj = '';
}
if ( !defined($send[0]) ) {
$send[0] = '';
}
$msg_count++;
#-------- Data bind ------------#
$sth->bind_param( 1, $msg_count ); # Reconfigure the msg_num to auto_increment
$sth->bind_param( 2, $sender_id ); # '[email protected]' TODO
$sth->bind_param( 3, $reciever_id ); # TODO Receiver ID, normal mail :null, bounced: Final_recipient
$sth->bind_param( 4, $send_date ); # '2007-07-19 14:20:55' Sent_time
$sth->bind_param( 5, $received_date ); # '2007-07-20 15:20:55'Receive_time
$sth->bind_param( 6, $sbj ); # Subject
$sth->bind_param( 7, $process_date); # Processed date
$sth->bind_param( 8, 'CHENPING' );
#------ INSERT DATA TO DB ------ #
$sth->execute() or die "Can't execute statement: $DBI::errstr";
$dbh->commit;
print LOG "$sender_id, Insert successfully!\n";
#} # --- End the judge of Status 'RO' And 'O'
$msg_number++;
} # --- End the @msgs Loop
#----- Confirm the insert messages entries, if not match unread counts,then rollback ---#
my $sql_cfm =qq{ SELECT COUNT(*) FROM COTS_TAB_READMAIL
WHERE PROCESSED_DATE=TO_DATE(?,'yyyy-mm-dd')};
print LOG "$sql_cfm\n";
my $stm_cfm = $dbh->prepare($sql_cfm);
$stm_cfm->bind_param(1,$process_date);
$stm_cfm->execute() or die "Can't execute statement: $DBI::errstr";
my $msg_cfm;
while ( @datasets = $stm_cfm->fetchrow_array() ) {
$msg_cfm =$datasets[0]; #----- Get the max msg_num for the following operationg.
print LOG "Retrieve the counts which inserted into Readmail table: $msg_cfm\n";
}
my $sql_delete =qq{ DELETE FROM COTS_TAB_READMAIL
WHERE PROCESSED_DATE=TO_DATE(?,'yyyy-mm-dd')};
if( $msg_cfm != $m_c_count[0] ){
print "!!!All Messages are not read!!!\n";
print LOG "!!!All Messages are not read, Unread counts : $m_c_count[0] !!!\n";
my $stm_delete = $dbh->prepare($sql_delete);
$stm_delete->bind_param(1,$process_date);
$stm_delete->execute() or die "Can't execute statement: $DBI::errstr";
$dbh->commit;
print LOG "Data,Rollback!\n";
print "Data,Rollback!\n";
}else{
print LOG "$m_c_count[0] Messages Read And Update to DB!\n";
print "$m_c_count[0] Messages Read And Update to DB!\n";
}
print LOG "All Messages Counts: $msg_unread_num \n"; # TODO change the name: $msg_unread_num
#------- Close the connection ------#
$dbh->disconnect();
close(LOG);
print "Finish Messages Processing.... at<".getMailDate('hms').">\n";
print LOG "Finish Messages Processing.... at<".getMailDate('hms').">\n";
}
#-----------$$$ Helper Subroutine $$$----------#
sub getMsgCount {
my ($mbox) = @_;
my @msgs = read_mbox $mbox or die "Can't read $mbox";
#my @msgs = read_mbox $mbox or die "Can't read $mbox:$!";
my $new_count = 0;
my $unread_count = 0;
my $all_count = 0;
my @vector=();
foreach (@msgs) {
my $mail = Mail::Internet->new($_);
my $header = $mail->head;
my @status = $header->get('Status'); #--- Show the unread messages
my($stat) = @status;
chomp($stat);
if ( !defined($stat) ) {
$new_count++;
$unread_count++;
}
if ( $stat eq 'O' ) {
$unread_count++;
}
$all_count++;
}
$vector[0]=$all_count;
$vector[1]=$new_count;
$vector[2]=$unread_count;
return @vector;
}
#----- Extract the Date Format from the sent_time ------#
sub dateFormat {
my ($st_time) = @_;
my @dateformat;
my @dateformat_temp = split( /,/, $st_time );
if ( $dateformat_temp[1] =~ /\+/ ) {
@dateformat = split( /\+/, $dateformat_temp[1] );
}
elsif ( $dateformat_temp[1] =~ /-/ ) {
@dateformat = split( /-/, $dateformat_temp[1] );
}
$dateformat[0] =~ s/^\s+//g;
$dateformat[0] =~ s/\s+$//g;
return $dateformat[0];
}
#------ Extract the Sender emailid and validate the lenght ------#
sub getEmailID {
my ($mailid) = @_;
my $mailbody;
$mailid =~ s/^\s+//g;
$mailid =~ s/\s+$//g; #----- Trim a string in the header and tailer;
my $len;
if ( $mailid =~ /</ ) {
my @mail = split( /</, $mailid );
$mailbody = $mail[1];
if ( chop($mailbody) eq '>' ) {
$len = rindex $mailbody . "\$", "\$"; #----- Get the string length
if ( $len < 51 ) {
return $mailbody;
}
else {
return "illegal email";
}
}
}
else {
$len = rindex $mailid . "\$", "\$";
if ( $len < 51 ) {
return $mailid;
}
else {
return "illegal email";
}
}
}
sub getMailDate {
my ($timeformat)=@_;
# $timeformat = ymd :yyyymmdd
# $timeformat = y-m-d :yyyy-mm-dd
# $timeformat = hms :HH:MM:SS
# $timeformat = ymdhms :yyyymmdd_hh_mm_ss
my @months = qw(01 02 03 04 05 06 07 08 09 10 11 12);
my (
$Second, $Minute, $Hour, $Day, $Month,
$Year, $WeekDay, $DayOfYear, $IsDST
)
= localtime( time - 86400 );
$Year += 1900;
my $date;
if($timeformat eq 'ymd'){
$date = "$Year$months[$Month]$Day";
}elsif($timeformat eq 'y-m-d'){
$date = "$Year-$months[$Month]-$Day";
}elsif($timeformat eq 'hms'){
$date = "$Hour:$Minute:$Second";
}elsif($timeformat eq 'ymdhms'){
$date = "$Year$months[$Month]$Day_$Hour:$Minute:$Second";
}else{
$date = "$Year$months[$Month]$Day";
}
return $date;
}
sub poolLog {
my ($logfile)=@_;
# --- create a log file --- #
$logfile = $logfile.".log";
while ( -e $logfile ) {
sleep 2;
}
# --- Write the log in the file --- #
open( LOGS, ">$logfile" ) || die("Cannot open log file!\n");
print LOGS "Just for read mail test\n";
close(LOGS);
}
#-------------- TEST MAIN -----------------#
&mailRead("rcotbg1");
#use Getopt::Std;
#use vars qw($opt_d $opt_f $opt_p);
#getopts('d:f:p');
#print &getMailFileDate() if $opt_d;
#print &getMailDate() if $opt_f;
#print "helloworld" if $opt_p;
#&getMsgCount('D:/mbox/rcotbg2.txt');
#&poolLog('test');