Python学习日记1
python我很早就已经接触过了,但一直都是稍微了解一点点基础这样而已,所以趁着这个长久无聊的假期好好站在巨人的肩膀上去学习(其实就是抄大佬的源码)
对于源码,我会每个都进行分析,以后每一天的学习日记我都会发上CSDN上面,供我以后回看以及也想给各位小伙伴们一点点成长经验。
我的文章里面,我看过谁的博客我都会在旁边发出相关网址,毕竟学习了,肯定不能忘记大佬,很感谢这么多素不相识的大佬给我带路,谢谢你们。
先说今天做的事情
数据库真的香!
建立数据库&对数据库进行操作
安装的教程可以看下面的第一第二条链接,我在这里贴出一些我平时使用cmd命令行的常用命令
我的数据库与存储都是装在D盘中:
打开数据库服务
到D:\data\db输入D:\Data_Mongodb\bin\mongod.exe
mongod --dbpath=D:\Data
mongo localhost
net start MongoDB
展示我的数据库内容
show dbs
show colletions
net stop MongoDB
安装与配置MongDB
安装与配置MongDB
MongDB的远程备份以及还原
MongDB的基本操作——菜鸟教程
MongDB的查询
数据库的可视化使用——下载软件
读取数据库数据,并赋值给列表类型
import pymongo
def get_href():
global PIds
collection_read = db_save.jd_A6000
read_data = pd.DataFrame(list(collection_read.find()))
read_data = read_data['编号']
for data in read_data:
#print(data)
PIds.append(data)
print('Output successfully.')
return (PIds)
client = pymongo.MongoClient(host = 'localhost', port = 27017)
db_save = client.JD_Products
PIds = []
get_herf(PIds)
print(PIds)
上面的转化代码是我自己参照着写的,改动很多。
csv格式转化成数据库内容
#读取Excel文件并进去数据库中
import xlrd
import sys
import json
import pymongo
from pymongo import MongoClient
#连接数据库
client=MongoClient('localhost',27017)
db=client.JD_products
C_account=db.JD_xiaoliang_A6000
data=xlrd.open_workbook('A6000.xls')
table=data.sheets()[0]
#读取excel第一行数据作为存入mongodb的字段名
rowstag=table.row_values(0)
nrows=table.nrows
#ncols=table.ncols
#print rows
returnData={}
for i in range(1,nrows):
#将字段名和excel数据存储为字典形式,并转换为json格式
returnData[i]=json.dumps(dict(zip(rowstag,table.row_values(i))))
#通过编解码还原数据
returnData[i]=json.loads(returnData[i])
#print returnData[i]
C_account.insert_one(returnData[i])
(感谢这位打咯啊给的思路,大部分都是他的源码,但是我着实找不到这个网址了害,麻烦又看到这个的可以跟我说一下,我贴一下网址)
数据库内容转变成csv格式
#from_mongoDB_to_csv_commits.py
'''
读取MongoDB的内容进入csv
@author 系海hong啊
'''
import pymongo
import csv
import time
def save_every_page(writer, allRecordRes):
listnum = 0
fieldList = {
'星级',
'内容',
'购买套餐'
}
for record in allRecordRes:
recordValueList = []
#print(record)
time.sleep(0.0000001)
for filed in fieldList:
if filed not in record:
recordValueList.append("None")
else:
recordValueList.append(record[filed])
#print("有内容")
try:
writer.writerow(recordValueList)
listnum += 1
#print('已写入')
except Exception as e:
print('write csv exception. e = {e}')
#wb_csvMongo.save('%s.csv' % Data_Base)
print("共找到{}条目录".format(listnum))
'''
def save_csv():
#wb_csvMongo = xlwt.Workbook()
#f = wb_csvMongo.add_sheet('JD_Bra')
return writer
'''
def get_table(collection_read, PIds):
read_data = collection_read.find({}, {"_id": 0, "编号": 1})
for data in read_data:
data = list(data.values())
PIds.extend(data)
print('Output successfully.')
return PIds
def open_MongoDB(Data_Base, Data_Table):
client = pymongo.MongoClient(host = 'localhost', port = 27017)
db_des = client[Data_Base]
db_products = client[Data_Base]
collection_read = db_products[Data_Table]
PIds = []
csvfileWriter = open(Data_Base + Data_Table + ".csv", "a+", newline='', encoding = 'utf-8')
fieldList = {
'星级',
'内容',
'购买套餐'
}
writer = csv.writer(csvfileWriter)
writer.writerow(fieldList)
get_table(collection_read, PIds)
for pid in PIds:
Data_Table = pid
db_des_table = db_des[Data_Table]
allRecordRes = db_des_table.find()
print('现在开始写入' + Data_Table)
save_every_page(writer, allRecordRes)
if __name__ == '__main__':
Data_Base = 'JD_Products'
Data_Table = 'JD_xiaoliang_Products'
open_MongoDB(Data_Base, Data_Table)
print('程序结束')
这个我修改了很多,而且将程序中本来看不懂的内容重新进行了编写,并且对代码进行框架化,将代码尽量放在函数中调用,模块化的代码方便加入其他的程序,减少调试。
至于原地址,我当时是翻找了很多的网页,刚刚重新翻找了一下早上的记录,但是一点都找不到这个代码的网页,所以对不起这位给我启发的大佬了。如果你发现是你的源码,麻烦你跟我说一下,我将你的网址贴上来,谢谢!
建立自己的ip池(这个真的是宝藏啊!!!)超级感谢这一位在腾讯云网站上发布消息的大哥!!!
get_ip.py
#!/bin/env python
# -*- coding:utf-8 -*-
# _author:kaliarch
import requests
from bs4 import BeautifulSoup
import random
class GetProxyIP:
def __init__(self, page=10):
self._page = page
self.url_head = 'http://www.xicidaili.com/wt/'
def get_ip(self):
"""
get resouce proxy ip pool
:return: res_pool list
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"}
res_pool = []
for pagenum in range(1, self._page):
url = self.url_head + str(pagenum)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
soup_tr = soup.find_all('tr')
for item in soup_tr:
try:
soup_td = item.find_all('td')
# 获取到网页的代理IP信息
res_pool.append(soup_td[5].text.lower() + '://' + soup_td[1].text + ':' + soup_td[2].text)
except IndexError:
pass
return res_pool
def right_proxies(self, res_pool):
"""
check available ip
:param res_pool:
:return:right_pool list
"""
right_pool = []
for ip in res_pool:
if 'https' in ip:
proxies = {'http': ip}
else:
proxies = {"http": ip}
check_urllist = ['http://www.baidu.com', 'http://www.taobao.com', 'https://cloud.tencent.com/']
try:
response = requests.get(random.choice(check_urllist), proxies=proxies, timeout=1)
# 判断筛选可用IP
if response.status_code:
right_pool.append(proxies)
print('add ip %s' % proxies)
except Exception as e:
continue
return right_pool
if __name__ == '__main__':
# 实例化类,可以传入page
proxyhelper = GetProxyIP(2)
res_pool = proxyhelper.get_ip()
proxy_ip = proxyhelper.right_proxies(res_pool)
print(proxy_ip)
save_ip.py
#!/bin/env python
# -*- coding:utf-8 -*-
# _author:kaliarch
import configparser
import get_ip
import pymongo
class MongodbOper:
def __init__(self):
"""
initialization redis infomation
:param
"""
def mongodb_save(self, result_list):
"""
save data
:return:None
"""
for values in result_list:
collection.insert(values)
def mongodb_gain(self):
"""
gain data
:return: proxies
"""
ip = collection.find_one()
return ip
if __name__ == '__main__':
global collection, db
proxyhelper = get_ip.GetProxyIP(2)
res_pool = proxyhelper.get_ip()
proxy_ip = proxyhelper.right_proxies(res_pool)
client = pymongo.MongoClient(host = 'localhost', port = 27017)
db = client.IP_Pool
collection = db.IP_geted
dbhelper = MongodbOper()
dbhelper.mongodb_save(proxy_ip)
ip = dbhelper.mongodb_gain()
print(ip)
上面的两个代码要一起下载,可以直接建立两个文件在一个文件夹下面,直接调用save_ip.py就可以。
这两个代码我改动的不多,所以作者我没有改动,还是原作者,以表尊敬。
ip池原地址
最后,放上一个完全自己写的东西!!!今天终日成就,太开心了,虽然不是啥大东西,但就是对我这种菜鸟来说很开心。
#get_all_mongo_collections_to_one.py
import pymongo
import time
def to_save(db, data):
table_save = db.JD_P_All
try:
table_save.insert_one(data)
except Exception as e:
print('Insert The Data Failed')
time.sleep(0.1)
def get_ids(db):
table_read = db['JD_xiaoliang_P']
read_data = table_read.find({}, {"_id": 0, "编号": 1})
for data in read_data:
data = list(data.values())
PIds.extend(data)
def read_one_colletion(data_Id, db):
table_read_one = db[data_Id]
read_data_one = table_read_one.find({}, {"_id": 0, "creationTime": 1, "score": 1, "content": 1, "referenceName": 1, "days": 1})
for data in read_data_one:
#data = data.values()
#print(data)
#time.sleep(0.1)
to_save(db, data)
if __name__ == '__main__':
client = pymongo.MongoClient('localhost', 27017)
db = client.JD_Product
PIds = []
get_ids(db)
listnum = 1
for i in PIds:
read_one_colletion(i, db)
time.sleep(0.001)
listnum += 1
print(listnum)
print("程序结束")
感谢各位阅读,转载希望大家也能附上该链接,大家的支持与进步就是我最大的动力!!