python百度关键词相关搜索采集,链轮查询采集相关关键词工具exe

python百度关键词相关搜索采集,链轮查询采集相关关键词工具exe

1.随机生成协议头
2.关键词相关筛选
3.关键词去重
4.链轮采集

#百度关键词相关搜索采集
#20191118
# -*- coding: UTF-8 -*-

import requests,re,time
from fake_useragent import UserAgent
from bs4 import BeautifulSoup

#随机生成协议头
def ua():
    ua=UserAgent()
    headers = {
     "User-Agent": ua.random}
    return headers

#筛选词初始设定
req="工业|产品|外观|结构|造型|手机|犀牛|rhino|proe"


#筛选关键词
def search(req,con,n):
    text=re.search(req,con)
    if text:
        data=text.group(n)
    else:
        data='no'
    return data

#获取相关关键词源码
def get_a(key):
    response = requests.get(f"https://www.baidu.com/s?ie=utf-8&tn=baidu&wd={key}", headers=ua(), timeout=5).text
    time.sleep(2)
    soup = BeautifulSoup(response, 'lxml')
    div = soup.find('div', id='rs').find_all('a')
    return div


#相关关键词链轮查询采集
def get_keywords(keywords):
    xg_keywords=keywords

    all_keywords=[]
    for key in keywords:
        print(">>>开始查询 %s 相关关键词!" % key)
        try:
            div=get_a(key)
        except Exception as e:
            print(f'错误代码:{e}')
            print(f'正在重新获取网页内容...')
            time.sleep(5)
            div = get_a(key)

        for a in div:
            keyword=a.get_text()
            print(keyword)

            #筛选
            if search(r'(%s)'%req,keyword,1) =='no':
                print(f'-剔除关键词 {keyword}')
            else:
                print(f'>>获取关键词 {keyword}')

                print(xg_keywords)
                #去重
                if keyword not in xg_keywords:
                    xg_keywords.append(keyword)
                    all_keywords.append(keyword)

    #链轮
    get_keywords(all_keywords)


if __name__ == '__main__':
    get_keywords(["工业设计培训"])

代码参考来源:流量贩子 《seo应用编程》
python百度关键词相关搜索采集,链轮查询采集相关关键词工具exe_第1张图片

版本二

百度相关搜索关键词抓取
1.读取txt文档关键词
2.导出txt关键词
3.多线程采集关键词

#百度相关搜索关键词抓取,读取txt关键词,导出txt关键词

# -*- coding=utf-8 -*-
import requests
import re
import time
from multiprocessing.dummy import Pool as ThreadPool


#百度相关关键词查询
def xgss(url):
    headers = {
     
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
    }
    html=requests.get(url,headers=headers).text
    #print(html)
    ze=r'
相关搜索
(.+?)
'
xgss=re.findall(ze,html,re.S) #print(xgss) xgze=r'(.+?)' sj=re.findall(xgze,str(xgss),re.S) #print(sj) gjc='' for x in sj: print(x[1]) gjc=gjc+x[1]+'\n' # 导出关键词为txt文本 with open(".\gjcsj.txt", 'a', encoding='utf-8') as f: f.write(gjc) print("-----------------------------------") return gjc print("程序运行,正在导入关键词列表!!!") print("-----------------------------------") # 导入要搜索的关键词txt列表 urls = [] data = [] for line in open('.\gjc.txt', "r", encoding='utf-8'): data.append(line) print("导入关键词列表成功!") print("-----------------------------------") #转换关键词为搜索链接 for keyword in data: url = 'https://www.baidu.com/s?wd=' + keyword urls.append(url) print("采集百度相关搜索关键词开启!") print("...................") #多线程获取相关关键词 try: # 开4个 worker,没有参数时默认是 cpu 的核心数 pool = ThreadPool() results = pool.map(xgss, urls) pool.close() pool.join() print("采集百度相关搜索关键词完成,已保存于gjcsj.txt!") except: print("Error: unable to start thread") print("8s后程序自动关闭!!!") time.sleep(8)

python百度关键词相关搜索采集,链轮查询采集相关关键词工具exe_第2张图片
exe下载地址:
链接: https://pan.baidu.com/s/1RhmZ99dYCSIJsEe-SnlhXQ
提取码: 9sjs

你可能感兴趣的:(python与seo,百度相关关键词,python,关键词采集,seo,关键词)