#!/usr/bin/python # -*- coding: utf-8 -*- """ 用于切分 nginx日志 nginx日志 格式: log_format access '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" $http_x_forwarded_for'; INSERT INTO `nginxlog` (`status`, `remote_user`, `http_referer`, `remote_addr`, `http_x_forwarded_for`, `hostname`, `request`, `request_type`, `http_user_agent`, `time_local`) VALUES ('2', '2', '2', '2', '2', '2', '2', '2', '2', '2') table sql: CREATE TABLE `nginxlog` ( `status` int(4) DEFAULT NULL, `remote_user` varchar(20) DEFAULT NULL, `http_referer` text, `remote_addr` varchar(20) DEFAULT NULL, `http_x_forwarded_for` varchar(20) DEFAULT NULL, `hostname` varchar(50) DEFAULT NULL, `request` varchar(200) DEFAULT NULL, `request_type` varchar(10) DEFAULT NULL, `http_user_agent` varchar(200) DEFAULT NULL, `time_local` int(10) unsigned DEFAULT NULL, `server_id` int(5) unsigned DEFAULT NULL ) ENGINE=MyISAM DEFAULT CHARSET=utf8 """ #--------------------------config---------------------------------------------------------------------- #config mysql = {} mysql['mhost'] = 'localhost' mysql['muser'] = 'root' mysql['mpwd'] = '' mysql['mport'] = '3306' mysql['mdbname'] = 'nginxlog' #获取当前执行目录 工作目录 ROOT = '/home/nginxweblog/app/' #ftp 目录 用于存放生产 原始 日志 Sourcedatadir = '/home/nginxweblog/sourcedata/' #log run Logfile = ROOT+'nginxlog.log' #--------------------------------import-------------------------------------------------------- import re,time,os import MySQLdb #--------------------------------Sublog-------------------------------------------------------- #分析 每行的 nginxlog 数据 class Sublog: def __init__(self, logline=None): if logline: self.logline = logline.strip() self.logdate = {} #self.run() def run(self): if self.logline: self.splitlog() self.getlog() def splitlog(self): # 按照空格切分日志 self.splitlogd = re.split(' ', self.logline) #print self.splitlogd return self.splitlogd def getlog(self): if len(self.splitlogd) >= 20: self.logdate['remote_addr'] = self.splitlogd[0] self.logdate['remote_user'] = self.splitlogd[2] self.logdate['time_local'] = time.mktime(time.strptime(self.splitlogd[3], '[%d/%b/%Y:%H:%M:%S')) self.logdate['request_type'] = self.splitlogd[5].replace('\"', '') self.logdate['request'] = self.splitlogd[6] self.logdate['status'] = self.splitlogd[8] self.logdate['http_referer'] = MySQLdb.escape_string(self.splitlogd[10].replace('\"', '')) self.logdate['http_x_forwarded_for'] = self.splitlogd[len(self.splitlogd)-1] self.get_http_user_agent() self.get_hostname() def get_http_user_agent(self): agent = re.compile(r'\"(.*?)\"') self.logdate['http_user_agent'] = MySQLdb.escape_string(agent.findall(self.logline)[2]) def get_hostname(self): hostname_r = re.compile(r'http://(.*?)/') ishostname = hostname_r.findall(self.splitlogd[10]) #print ishostname if len(ishostname) >= 1: self.logdate['hostname'] = ishostname[0] else: self.logdate['hostname'] = '-' #--------------------------------insert_log--------------------------------------------- # 生成 csv 数据 ||| 分割 数据 , 主要用于 mysql快速导入 class insert_log(Sublog): def __init__(self, logpath, serverid): Sublog.__init__(self) self.nowtime = time.strftime("%Y_%m_%d",time.localtime(time.time())) self.sunlogfile = "nginxlog_"+self.nowtime+'.csv' self.logpath = logpath self.serverid = serverid self.numbres = 0 #self.Mysql_db_instance = Mysql_db() def createsubfile(self): self.opencsvfile() self.subfile = open(ROOT+'CSV/'+self.sunlogfile, 'a') olog = open(self.logpath) for i in olog: #self.logdate = {} self.logline = i.strip() self.run() if self.logdate: #print self.logdate self.createsql() #self.insertdb() #time.sleep(3) self.numbres = self.numbres + 1 self.subfile.close() return [ROOT+'CSV/'+self.sunlogfile, self.sunlogfile, self.numbres] def insertdb(self): sql = "INSERT INTO `nginxlog` (`status`, `remote_user`, `http_referer`, `remote_addr`, `http_x_forwarded_for`, `hostname`, `request`, `request_type`, `http_user_agent`, `time_local`) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');" % (self.logdate['status'], self.logdate['remote_user'], self.logdate['http_referer'], self.logdate['remote_addr'], self.logdate['http_x_forwarded_for'], self.logdate['hostname'], self.logdate['request'], self.logdate['request_type'], self.logdate['http_user_agent'], self.logdate['time_local']) #print sql self.subfile.write(sql + '\n') #self.Mysql_db_instance.insertsql(sql) #self.Mysql_db_instance.commit() def createsql(self): c = "%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s" % (self.logdate['status'], self.logdate['remote_user'], self.logdate['http_referer'], self.logdate['remote_addr'], self.logdate['http_x_forwarded_for'], self.logdate['hostname'], self.logdate['request'], self.logdate['request_type'], self.logdate['http_user_agent'], self.logdate['time_local'],self.serverid) self.subfile.write(c + '\n') def opencsvfile(self): if os.path.isdir(ROOT+'CSV'): pass else: os.mkdir(ROOT+'CSV') #--------------------------------function --------------------------------------------- def load_mysql(csvfile): if mysql['mpwd']: os.system("""mysql -h%s -u%s -p%s -e "LOAD DATA INFILE '%s' INTO TABLE nginxlog.nginxlog FIELDS TERMINATED BY '|||';" && rm -rf %s """ % (mysql['mhost'], mysql['muser'], mysql['mpwd'], csvfile, csvfile)) else: os.system("""mysql -h%s -u%s -e "LOAD DATA INFILE '%s' INTO TABLE nginxlog.nginxlog FIELDS TERMINATED BY '|||';" && rm -rf %s """ % (mysql['mhost'], mysql['muser'], csvfile, csvfile)) #索引处理 type = 0 删除索引, 1创建索引 def mysqlindex(type): if mysql['mpwd']: ism = "mysql -h%s -u%s -p%s -e " % (mysql['mhost'], mysql['muser'], mysql['mpwd']) else: ism = "mysql -h%s -u%s -e " % (mysql['mhost'], mysql['muser']) if type == 0: os.system(" %s 'alter table nginxlog.nginxlog drop index time_local' " % ism) os.system(" %s 'alter table nginxlog.nginxlog drop index hostname' " % ism) os.system(" %s 'alter table nginxlog.nginxlog drop index remote_addr' " % ism) elif type == 1: os.system(" %s 'alter table nginxlog.nginxlog add index time_local(time_local)' " % ism) os.system(" %s 'alter table nginxlog.nginxlog add index hostname(hostname)' " % ism) os.system(" %s 'alter table nginxlog.nginxlog add index remote_addr(remote_addr)' " % ism) """ 日志记录函数 """ def write_logs(logconten): logfile_path = Logfile if logfile_path and logconten: log_write = open(logfile_path, 'a') log_write.write(logconten+'\n') log_write.close() # #--------------------------------运行--------------------------------------------- def run(): list = os.listdir(Sourcedatadir) print 'start.......' write_logs('-----------%s start.....----%s-----------------' % ( time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), str(list)) ) for line in list: if os.path.isfile(Sourcedatadir+line): s = re.split('_', line) if re.match(r'^\d.*$', s[0]): write_logs('time(%s) logfile(%s) start ....' % (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), line)) insert_log_instance = insert_log(Sourcedatadir+line, s[0]) gfiledata = insert_log_instance.createsubfile() load_mysql(gfiledata[0]) os.remove(Sourcedatadir+line) write_logs('time(%s) logfile(%s) log numbers(%s) stop ....' % (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), line, str(gfiledata[2])) ) print Sourcedatadir+line+ ' OK ....... ' #-------------------------------- exece --------------------------------------------- if __name__ == "__main__": #run_inotify(Sourcedatadir, run) run()