QQ坦白说代码信息获取过滤代码讲解

本文前传:https://blog.csdn.net/gengli2017/article/details/81940707
要根据QQ坦白说提取信息,可以通过爬虫获取好友信息,下面对获取信息过程进行讲解
0. 本文用的是Python3.7,selenium3.14.0
1. selenium是什么?selenium是一个python库,可以很方便抓取网页,此处用来自动登录QQ和抓取网页
2. gtk和g_qzonetoken是什么?可以简单理解为定位好友信息的网址的一部分,就用来定位好友信息
3. gtk和g_qzonetoken怎样获取?gtk通过浏览器cookie获取,g_zonetoken可以通过解析网页获取。浏览器cookie就是浏览器缓存。更详细的见:https://blog.csdn.net/vision_tung/article/details/78888639 ,https://blog.csdn.net/Vision_Tung/article/details/78888695。
4. gkt和g_qzonetoken每次登录都会不同
5. 我的好友列表网址,myQQ是你的qq号,其他对应换https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_hat_get.cgi?hat_seed=1&uin=’ + str(myQQ) +’&fupdate=1&g_tk=’+str(gtk)+’&qzonetoken=’+str(g_qzonetoken)+’&g_tk=’+str(gtk)
6. 好友信息网址https://mobile.qzone.qq.com/profile_get?qzonetoken=‘+str(g_qzonetoken) + ‘&g_tk=’+str(gtk)+’&format=json&hostuin=’ + str(friendQQ)
7. 自己和好友认识天数信息:https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/friendship/cgi_friendship?activeuin=’ + str(myQQ) +’&passiveuin=’ + str(friendQQ) +’&situation=1&isCalendar=1&g_tk=’+str(gtk) + ‘&qzonetoken=’+str(g_qzonetoken)+’&g_tk=’+str(gtk)
里面只提供开始认识的unix时间”addFriendTime”和现在系统时间”systemTime”:1534816754,两者之差就是认识时间差,怎么转换成天数、年数等见代码

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import re 
import datetime

def frankSpeak(account, password) :
    myQQ = account
    driver = loginQQ(account, password)
    gtk, g_qzonetoken = getGtk_Token(driver)
    friendsDict = getFriends(driver, myQQ, gtk, g_qzonetoken)
    i = 1
    friendInfoVector = []
    for friendQQ in friendsDict :
        i += 1
        (age, city, constellation, gender, province) = getFriendInfo(driver, friendQQ, gtk, g_qzonetoken)
        knownTime = getKnownTime(driver, myQQ, friendQQ, gtk, g_qzonetoken)
        friendInfo = (friendQQ, friendsDict[friendQQ], age, gender, constellation, knownTime, province, city)
        #(age,city,constellation,gender,province)
        #male:gender=1, famale:gender=0, else:gender=2 
        friendInfoVector.append(friendInfo)

        #to aviod your QQ locked by Tenser, find 5 friends infomation then reload
        #此处比较尴尬,频繁访问腾讯信息被封号两小时,所以没抓取五个好友信息就等两分钟
        if(i % 5 == 0) :
            time.sleep(120)
            driver = loginQQ(account, password)
            gtk, g_qzonetoken = getGtk_Token(driver)


    print('所有好友信息')
    print('(QQ号, 备注, 年龄, 性别, 星座, 认识时间, 省份, 城市)')
    print('女:性别=0   男:性别=1   未注明:性别=2')
    for i in range(len(friendInfoVector)) :
        print(friendInfoVector[i])

 #  print(gtk)
 #  print(g_qzonetoken)
 #  print(friendsDick)


#登录QQ,获取QQ页面 
def loginQQ(account, password) :
    chrome_options = Options()
    chrome_options.add_argument("--disable-infobars")
    driver = webdriver.Chrome() #这个是chormedriver的地址   
    driver.get('https://qzone.qq.com/')
    driver.switch_to.frame('login_frame')
    driver.find_element_by_id('switcher_plogin').click()
    driver.find_element_by_id('u').clear()
    driver.find_element_by_id('u').send_keys(account) 
    driver.find_element_by_id('p').clear()
    driver.find_element_by_id('p').send_keys(password)
    driver.find_element_by_id('login_button').click()
    time.sleep(2)
    return driver


#从Cookie获取GTK
def getGTKFromCookie(cookie):
    hashes = 5381
    for letter in cookie['p_skey']:
        hashes += (hashes << 5) + ord(letter)
    return hashes & 0x7fffffff


#获取gtk和g_qzonetoken    
def getGtk_Token(driver) :
    cookie = {}                                 #初始化cookie字典
    for elem in driver.get_cookies():           #取cookies
        cookie[elem['name']] = elem['value']
    gtk = getGTKFromCookie(cookie)

    html = driver.page_source
    g_qzonetoken=re.search('window\.g_qzonetoken = \(function\(\)\{ try\{return (.*?);\} catch\(e\)',html)
    #从网页源码中提取g_qzonetoken
    g_qzonetoken = str(g_qzonetoken[0]).split('\"')[1]
    return gtk, g_qzonetoken


#获得好友列表 
def getFriends(driver, myQQ, gtk, g_qzonetoken) :
    friendUrl = 'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_hat_get.cgi?hat_seed=1&uin=' + str(myQQ) +'&fupdate=1&g_tk='+str(gtk)+'&qzonetoken='+str(g_qzonetoken)+'&g_tk='+str(gtk)

    driver.get(friendUrl)
    friend_list = driver.page_source
    friend_list = str(friend_list )

    pattern  =  re.compile('\"(.\d*)\":\{\\n"realname":"(.*?)"}',re.S)
    QQ_name_list = re.findall(pattern, str(friend_list))
    friendDick=dict()          #numList => (QQnum:QQname)
    for friend in QQ_name_list:
        friendDick[str(friend[0])]=str(friend[1]) 

    return friendDick


#通过好友QQ号获取好友信息    
def getFriendInfo(driver, friendQQ, gtk, g_qzonetoken) :
    friendInfoUrl = 'https://mobile.qzone.qq.com/profile_get?qzonetoken='+str(g_qzonetoken) + '&g_tk='+str(gtk)+'&format=json&hostuin=' + str(friendQQ)

    driver.get(friendInfoUrl)
    friendInfo = driver.page_source
    friendInfo = str(friendInfo)

    #savefile(str(friendQQ)+'txt', friendInfo)

    pattern = re.compile( r'"age":(\d*).*"city":"(\w*)".*"constellation":"(\w*).*"gender":(-?\d*).*"province":"(\w*)"')
    usefulInfo = pattern.findall(friendInfo)

    if (any(usefulInfo)) :
        return usefulInfo[0]
    else :
        return ('-1','NULL','NULL','2','NULL')
    #usefulInfo=[(age,city,constellation,gender,province)]
    #usefulInfo[0]=(age,city,constellation,gender,province)
    #male:gender=1, famale:gender=0, else:gender=2


#通过自己QQ和好友QQ获取认识的时间    
def getKnownTime(driver, myQQ, friendQQ, gtk, g_qzonetoken) :
    knownDaysUrl = 'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/friendship/cgi_friendship?activeuin=' + str(myQQ) +'&passiveuin=' + str(friendQQ) +'&situation=1&isCalendar=1&g_tk='+str(gtk) + '&qzonetoken='+str(g_qzonetoken)+'&g_tk='+str(gtk)

    driver.get(knownDaysUrl)
    knownDaysInfo = driver.page_source
    knownDaysInfo = str(knownDaysInfo)

    beginStamp = re.search(r'"addFriendTime":(\d+)', knownDaysInfo)
    beginStamp = str(beginStamp.group(1))
    beginTime = datetime.date.fromtimestamp(int(beginStamp))
    beginY = beginTime.year
    beginM = beginTime.month
    beginD = beginTime.day

    lastStamp = re.search(r'"systemTime":(\d+)', knownDaysInfo)
    lastStamp = str(lastStamp.group(1))
    lastTime = datetime.date.fromtimestamp(int(lastStamp))
    lastY = lastTime.year
    lastM = lastTime.month
    lastD = lastTime.day

    if(lastY > beginY) :
        return (str(lastY - beginY + 1) + '年')
    elif(lastM > lastM) :
        return (str(lastM - beginM + 1) + '月')
    else :
        return (str(lastD - beginD + 1) + '日')



#运行程序,把QQNumber换成要登录QQ, password换成密码     
frankSpeak('QQNumber', 'password')

你可能感兴趣的:(知识分享)