python日志处理

第一次用python处理日志感受到了与C的差距。

日志如下:

"101.231.45.186" 0.028 "/Api/LiveMsg/jx_nums&md=fea755cefa90ca8161e121aa7c971dd2"
"203.195.214.218" 0.455 "/api/liver_msg.php?act=liver_center&source=pc&md=fea755cefa90ca8161e121aa7c971dd2"
"117.29.47.153" 0.411 "/api/liver_msg.php?act=liver_center&source=pc&md=fea755cefa90ca8161e121aa7c971dd2"
"223.88.45.218" 0.028 "/Api/LiveMsg/jx_nums&md=fea755cefa90ca8161e121aa7c971dd2"
"116.9.32.56" 0.030 "/Api/LiveMsg/jx_nums&md=fea755cefa90ca8161e121aa7c971dd2"
"66.249.79.169" 0.023 "/live/msg_detail.php?id=20&oid=1051868"
"118.112.56.111" 0.043 "/Quote/get_hot_stock_list?number=5&order=desc&md=fea755cefa90ca8161e121aa7c971dd2&_=1561012072995"
"60.216.38.218" 0.000 "/favicon.ico"
"180.159.193.162" 0.000 "/favicon.ico"
...
...
...

要求:取出php请求并取出act行为(没有则忽略)并计算相同请求的平均时间

例如:【/api/liver_msg.php?act=liver_center】【/live/msg_detail.php?】

python代码:

List = []   # url
List1 = []  # time
dictt = {}  # 存储
List4 = []  # 行号

def func(a, b):
    time = 0.00
    for num in range(len(b)):
        time += float(a[b[num]])   # 时间字符串浮点数化累加
    dictt[List[0]] = time/len(b)   # 时间均值及存储到字典

for url in open("url11.txt", "r").readlines():
    le = url.find(" ")
    url = url[le+1:]  # 去除IP
    le1 = url.find('"')
    url = url[:le1] + url[le1+1:]  # 去除前"
    if ".php" in url:
        if "act" not in url:  # [0.023 "/live/msg_detail.php?id=20&oid=1051868"] 无act
            le2 = url.find("?")
            url = url[:le2+1]
        else:
            le4 = url.find("act")
            tmp = url[le4:]
            if len(tmp) < 7:   # [0.159 "/live/detail_new.php?id=120&act="]去除act为空
                le5 = url.find("?")
                url = url[:le5+1]
            else:
                le3 = url.find("&")  # [0.040 "/search/query.php?act=idx_ifr_v1_new&md=]
                url = url[:le3]
        len1 = url.find(" ")
        url1 = url[len1 + 1:]  # 去掉文本中的空格
        url2 = url[0:len1]
        List.append(url1)  # url
        List1.append(url2)  # 时间

while List:
    #Listt = List[:]
    List4.append(0)  # 每次取元素一为基准
    for y in range(1, len(List)):
        if List[0] == List[y]:    # 重复计数
            List4.append(y)  # 追加行数
    func(List1, List4)

    List5 = List[:]
    List6 = List1[:]  # 避免【list index out of range】 这里使用副本取值
    for x in range(len(List4)):
        List.remove(List5[List4[x]])
        List1.remove(List6[List4[x]])
    List4 = []

print(dictt, end="")

结果:

D:\py\venv\Scripts\python.exe D:/py/my.py
{'/api/liver_msg.php?act=liver_center': 0.433, '/search/query.php?act=idx_ifr_v1_new': 0.03300000000000001, '/live/msg_detail.php?': 0.023, '/index.php?': 0.11940000000000002, '/liver/liver_release_message.php?act=timing_set': 0.134, '/live/detail_new.php?': 0.159, '/api/user.php?act=get_info2': 0.114}
Process finished with exit code 0

c代码+python代码:

/*
1.awk '{print $2,$3}' url.txt > url23.txt 【取出2,3列】-时间和url
2.过滤杂项,生成url_change.txt 【去除杂项】
3.test.py 【去重求均值】
*/

#include
#include
#include
#include

int main(int argc ,char *argv[]){
	char *move = NULL;
	char *move1 = NULL;
	char tmp [256];
        char tmp1[256];
	FILE *myfp,*add,*change;
	int len = 0;

	
	memset(tmp,256,0);
	memset(tmp1,256,0);
	change = fopen("url_change.txt","wb+");//去除杂项保留格式
	add = fopen("url_new.txt","wb+");//保存url文本过滤多余行【保留php行】
	myfp = fopen("url23.txt","rb+");//url原始文本
	
	while(fgets(tmp,256,myfp)){
		if(!strstr(tmp,".php")){
			continue;
		}
		fputs(tmp,add);
	}
        fclose(myfp);
	fclose(add);
	
	add = fopen("url_new.txt","rb+");
	while(fgets(tmp,256,add)){
		move = strstr(tmp,"act");
		if(move){
			move1=strchr(move,'&');
			if(move1){
				len = strlen(tmp) - strlen(move1);
				printf("%d\n",len);
				strncpy(tmp1,tmp,len);
		        tmp1[len] = '\0';
				tmp1[6] = ' ';
				fputs(tmp1,change);
				fputs("\n",change);
				continue;
			}else{
				move = strchr(tmp,'?');
				len = strlen(tmp) - strlen(move)+1;
				strncpy(tmp1,tmp,len);
		        tmp1[len] = '\0';
				tmp1[6] = ' ';
				fputs(tmp1,change);
				fputs("\n",change);
				continue;
			}
		}else{
			move = strchr(tmp,'?');
			len = strlen(tmp) - strlen(move)+1;
			strncpy(tmp1,tmp,len);
		    tmp1[len] = '\0';
			tmp1[6] = ' ';
			fputs(tmp1,change);
			fputs("\n",change);
			continue;
		}
	}
	
    fclose(change);
	return 0;	
}
List = []   #  url
List1 = []  # time
dictt = {}  # 存储
List4 = []  # 行号

def func(List1 , List4):
    time = 0.00
    for num in range(len(List4)):

        time += float(List1[List4[num]])

    dictt[List[0]] = time/len(List4)

for url in open("url_change.txt","r").readlines():
    len1 = url.find("  ")
    url1 = url[len1+2:-1]  # 去掉文本中的"和换行符
    url2 = url[0:len1]
    List.append(url1)
    List1.append(url2)

while List:
    Listt = List[:]

    List4.append(0)

    for y in range(1,len(Listt)):
        if List[0] == Listt[y]:
            List4.append(y)

    func(List1,List4)

    List5 = List[:]
    List6 = List1[:]
    for x in range(len(List4)):

        List.remove(List5[List4[x]])
        List1.remove(List6[List4[x]])

    List4 = []

print(dictt,end="")

 结果:

D:\py\venv\Scripts\python.exe D:/py/test.py
{'/api/liver_msg.php?act=liver_center': 0.433, '/search/query.php?act=idx_ifr_v1_new': 0.03300000000000001, '/live/msg_detail.php?': 0.023, '/index.php?': 0.11940000000000002, '/liver/liver_release_message.php?act=timing_set': 0.134, '/live/detail_new.php?': 0.159, '/api/user.php?act=get_info2': 0.114}
Process finished with exit code 0

 

 

 

 

 

 

 

 

 

你可能感兴趣的:(Python)