一、头文件
基本模块导入
import pandas as pd
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import re
机器学习模块:
from sklearn.cross_validation import train_test_split #分数据
import mglearn
from sklearn.linear_model import Ridge
from sklearn import datasets, linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from mglearn import plot_2d_separator
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
深度学习模块:
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPooling2D
from keras.models import load_model
读取文件方法,好多包都可读取文件:
C:/pypractise/fangzhen2/cj.csv
text = open('test_txt/14.txt','r')
q = text.read()
data=pd.read_csv('C:/pypractise/3/Advertising.csv')
读取文件列表,以便在依次遍历文件
def get_imglist(path):
return [os.path.join(path,f) for f in os.listdir(path) if f.endswith('.jpg')]
写入exls 大文件可用
def data_w(datas):
f= xlsxwriter.Workbook('demo1.xlsx')#创建一个excel文件
sheet1= f.add_worksheet(u'sheet1')
i = 0
j = 0
for data in datas[0]:
sheet1.write(i,j,data)
i=i+2
i = 1
j = 0
for data1 in datas[1]:
sheet1.write(i,j,data1)
i=i+2
f.close()
将文件写进行excel表格中:
def data_write(datas):
f = xlwt.Workbook()
sheet1 = f.add_sheet(u'ex.xls',cell_overwrite_ok=True) #创建sheet
#将数据写入第 i 行,第 j 列
i = 0 #行
j = 0 #列
for data in datas:
sheet1.write(i,j,data)
i=i+1
# i=1
# for data1 in datas[1]:
# sheet1.write(i,j,data1)
# i=i+2
f.save('ex.xls') #保存文件
将文件txt的写到csv中;
def data_write():
'''
函数为将txt数据写入到csv中
'''
with open('shuju.csv', 'w+',newline='') as csvfile:
spamwriter = csv.writer(csvfile, dialect='excel')
# 读要转换的txt文件,文件每行各词间以@@@字符分隔
with open('che.txt', 'r',encoding='utf-8') as filein:
for line in filein:
line_list = line.strip('\n').split(' ')
spamwriter.writerow(line_list)
正则语句切分. 文献去头尾;
#(\.(?=\s+(?:[A-Z])))|(\). )|(\.[ 0-9|A-Z])|(ACKNOWLEDGMENTS.*$)|(.*AUTHOR INFORMATION*$)|(REFERENCES.*$)|(References.*$)|(\?+)|(^.*ABSTRACT:)|(^.*INTRODUCTION )| (^.*CONSPECTUS:)
#正则规则去除开头结尾和*?
# 切分 \.(?=[\d+](?=\s+(?:[A-Z])))|\.(?=\s+(?:[A-Z])))
# 删除开头结尾 (ACKNOWLEDGMENTS.*$)|(AUTHOR.INFORMATION.*$)|(REFERENCES.*$)|(References.*$)|(^.*ABSTRACT:)|(^.*INTRODUCTION )| (^.*CONSPECTUS:)|(ASSOCIATED.CONTENT.*$)
#替换 ?和* (\?+)|(\*+)
re.sub(pattern1,'',q)
re.compile(r' ')
调用API翻译 百度:
ID自己的号
def fanyi(yuju):
'''
出入Q 返回src 和dst
'''
myurl = '/api/trans/vip/fieldtranslate' #分网页
appid = ' '
secretKey = ' '
q = yuju
q.encode(encoding = 'utf-8')
salt = random.randint(32768, 65536)
salt = str(salt)
domain = 'medicine'
sign = appid + q + salt + domain + secretKey #签名
m1 = hashlib.md5() #创建一个md5加密模式的hash对象
m1.update(sign.encode(encoding = 'utf-8')) #跟新对象
sign = m1.hexdigest() #返回摘要
fromLang = 'en'
toLang = 'zh'
httpClient = None
myurl = myurl+'?q='+parse.quote(q)+'&from='+fromLang+'&to='+toLang+'&appid='+appid+'&salt='+salt+'&domain='+'medicine'+'&sign='+sign
try:
httpClient = http.client.HTTPConnection('api.fanyi.baidu.com') #主网页
httpClient.request('GET', myurl)
response = httpClient.getresponse()
html = response.read().decode('utf-8')
html = json.loads(html) #返回翻译结果
# ds1t = html["trans_result"][0]["dst"]
# src1 = html["trans_result"][0]["src"]
except Exception as e:
print(e)
finally:
if httpClient:
httpClient.close()
return html
画图方法:
一般的画出来:
plt.title("ATP curve") #设置标题
plt.xlim(xmax=2500,xmin=-250) #设置x y轴的大小
plt.ylim(ymax=220,ymin=0)
plt.figure()
plt.plot(range(len(y_pred)),y_pred,'b',label='predict')
plt.plot(range(len(y_pred)),y_test,'r',label='test')
plt.legend(loc="upper right") #图标的位置
plt.xlabel("the number of sales")
plt.ylabel("value of sales")
plt.show()