python 数据读写 IO

1   TXT读写

# 写入
txt_0=open('packages/txt_0.txt','w')  # w 只写 r 只读
txt_0.write('写入数据到TXT')
txt_0.close()
# 读取
txt_1=open('packages/txt_0.txt','r')
print(txt_1.read())

group =  pd.read_table('bayes.txt',header=None,usecols = (0,1,3))


2  Excel 读写

# 1 xlsxwriter 读取写入
import xlsxwriter
# 写入
workbook = xlsxwriter.Workbook('packages/写入的XLSX.xlsx')
worksheet = workbook.add_worksheet()
N=0
for i in range(101):
    # print(i)
    worksheet.write(N,0,i)
    N=N+1
workbook.close()
print('写入成功')


print('\n\n pandas读取写入 转为DataFrame用pd写入')
# 读取
import pandas as pd
excel_0=pd.read_excel('packages/写入的XLSX.xlsx')
# print(excel_0)
# 写入
import numpy as np
np.random.seed(20)
a=np.random.randn(10,10)
# a=np.mat('2,3,4;2,3,4')
print(a)
# 转为pandas 的 DataFrame
df=pd.DataFrame(a)
df.to_excel('packages/写入的XLSX.xlsx')

3 pandas 读写 csv

# 写入
a=np.mat('2,3,4;2,3,4')
df=pd.DataFrame(a)
df.to_csv('packages/csv_0.csv')
# 读取
df=pd.read_csv('packages/csv_0.csv')
print(df)


# 读取保存csv 添加列名
group_0_1 =  pd.read_csv('DATA/Fly_0_1.csv',header=None)
group_0_1.columns = ['Fly','Game','Table']
print(group_0_1)
group_0_1.to_csv('DATA/Fly_0_2.csv',index=None,usecols = (0,1,2))


4 HDF (注:没有运行成功!)

import tables
import numpy as np 
from os.path import getsize
from tempfile import NamedTemporaryFile

np.random.seed(42)
a=np.random.randn(365,4)

tmpf = NamedTemporaryFile()
h5file = tables.openFile(tmpf.name,mode='w',title='NumpPy Array')
root = h5file.root
h5file.createArray(root,'array',a)
h5file.close()

h5file = tables.openFile(tmpf.name,'r')
print(getsize(tmpf.name))

for node in h5file.iterNodes(h5file.root):
	b = node.read()
	print(type(b),b.shape)
h5file.close()


5 HDF5仓库 HDFStore(注:没有运行成功!)

import numpy as np
import pandas as pd
from tempfile import NamedTemporaryFile

np.random.seed(42)
a = np.random.randn(365, 4)

tmpf = NamedTemporaryFile()
store = pd.io.pytables.HDFStore(tmpf.name)
print( store)

df = pd.DataFrame(a)
store['df'] = df
print( store)

print( "Get", store.get('df').shape)
print( "Lookup", store['df'].shape)
print( "Dotted", store.df.shape)

del store['df']
print( "After del\n", store)

print( "Before close", store.is_open)
store.close()
print( "After close", store.is_open)

df.to_hdf(tmpf.name, 'data', format='table')
print( pd.read_hdf(tmpf.name, 'data', where=['index>363']))

6 pandas Json 读写

import pandas as pd

json_str = '{"country":"Netherlands","dma_code":"0","timezone":"Europe\/Amsterdam","area_code":"0","ip":"46.19.37.108","asn":"AS196752","continent_code":"EU","isp":"Tilaa V.O.F.","longitude":5.75,"latitude":52.5,"country_code":"NL","country_code3":"NLD"}'
# 读取Json
data = pd.read_json(json_str, typ='series')
print ("Series\n", data)
# 写入
data["country"] = "Brazil"
print ("New Series\n", data.to_json())


# 解析Json
import json
import requests

ip = '50.78.253.58'
html=requests.get('http://freegeoip.net/json/'+ip)
respondjson=json.loads(html.text)
print(respondjson.get('country_code'))


7 cv2 图片 读写

import cv2
import numpy as np
# 读取
img = cv2.imread('packages/000.jpg')
# 编辑
img[50:150,50:150]=255
img[:,:,2]=255
img[300:600,300:600]=(200,200,0) #BGR
# 显示图片
cv2.imshow('2',img)
cv2.waitKey()
cv2.destroyAllWindows()
# 写入图片
cv2.imwrite('packages/Output.jpg',img)



8 全部代码:

print('----------1 TXT读写-----------\n')
# 写入
txt_0=open('packages/txt_0.txt','w')  # w 只写 r 只读
txt_0.write('写入数据到TXT')
txt_0.close()
# 读取
txt_1=open('packages/txt_0.txt','r')
print(txt_1.read())

group =  pd.read_table('bayes.txt',header=None,usecols = (0,1,3))



print('\n\n----------------2 Excel 读写-----------------------')
# 1 xlsxwriter 读取写入
import xlsxwriter
# 写入
workbook = xlsxwriter.Workbook('packages/写入的XLSX.xlsx')
worksheet = workbook.add_worksheet()
N=0
for i in range(101):
    # print(i)
    worksheet.write(N,0,i)
    N=N+1
workbook.close()
print('写入成功')


print('\n\n pandas读取写入 转为DataFrame用pd写入')
# 读取
import pandas as pd
excel_0=pd.read_excel('packages/写入的XLSX.xlsx')
# print(excel_0)
# 写入
import numpy as np
np.random.seed(20)
a=np.random.randn(10,10)
# a=np.mat('2,3,4;2,3,4')
print(a)
# 转为pandas 的 DataFrame
df=pd.DataFrame(a)
df.to_excel('packages/写入的XLSX.xlsx')








print('\n\n----------3 pandas 读写 csv----------------')
# 写入
a=np.mat('2,3,4;2,3,4')
df=pd.DataFrame(a)
df.to_csv('packages/csv_0.csv')
# 读取
df=pd.read_csv('packages/csv_0.csv')
print(df)


# 读取保存csv 添加列名
group_0_1 =  pd.read_csv('DATA/Fly_0_1.csv',header=None)
group_0_1.columns = ['Fly','Game','Table']
print(group_0_1)
group_0_1.to_csv('DATA/Fly_0_2.csv',index=None,usecols = (0,1,2))









print('\n\n----------------4 HDF-运行失败----------------')

import tables
import numpy as np 
from os.path import getsize
from tempfile import NamedTemporaryFile

np.random.seed(42)
a=np.random.randn(365,4)

tmpf = NamedTemporaryFile()
h5file = tables.openFile(tmpf.name,mode='w',title='NumpPy Array')
root = h5file.root
h5file.createArray(root,'array',a)
h5file.close()

h5file = tables.openFile(tmpf.name,'r')
print(getsize(tmpf.name))

for node in h5file.iterNodes(h5file.root):
	b = node.read()
	print(type(b),b.shape)
h5file.close()




print('\n\n------------------5 HDF5仓库 HDFStore-运行失败---------------------')
import numpy as np
import pandas as pd
from tempfile import NamedTemporaryFile

np.random.seed(42)
a = np.random.randn(365, 4)

tmpf = NamedTemporaryFile()
store = pd.io.pytables.HDFStore(tmpf.name)
print( store)

df = pd.DataFrame(a)
store['df'] = df
print( store)

print( "Get", store.get('df').shape)
print( "Lookup", store['df'].shape)
print( "Dotted", store.df.shape)

del store['df']
print( "After del\n", store)

print( "Before close", store.is_open)
store.close()
print( "After close", store.is_open)

df.to_hdf(tmpf.name, 'data', format='table')
print( pd.read_hdf(tmpf.name, 'data', where=['index>363']))





print('\n\n------------pandas Json---------------------')
import pandas as pd

json_str = '{"country":"Netherlands","dma_code":"0","timezone":"Europe\/Amsterdam","area_code":"0","ip":"46.19.37.108","asn":"AS196752","continent_code":"EU","isp":"Tilaa V.O.F.","longitude":5.75,"latitude":52.5,"country_code":"NL","country_code3":"NLD"}'
# 读取Json
data = pd.read_json(json_str, typ='series')
print ("Series\n", data)
# 写入
data["country"] = "Brazil"
print ("New Series\n", data.to_json())


# 解析Json
import json
import requests

ip = '50.78.253.58'
html=requests.get('http://freegeoip.net/json/'+ip)
respondjson=json.loads(html.text)
print(respondjson.get('country_code'))









print('\n\n----------------------cv2 图片--------------------------')
import cv2
import numpy as np
# 读取
img = cv2.imread('packages/000.jpg')
# 编辑
img[50:150,50:150]=255
img[:,:,2]=255
img[300:600,300:600]=(200,200,0) #BGR
# 显示图片
cv2.imshow('2',img)
cv2.waitKey()
cv2.destroyAllWindows()
# 写入图片
cv2.imwrite('packages/Output.jpg',img)



9 参数:

参数
Parameters:	

path_or_buf : 
string or file handle, default None
File path or object, if None is provided the result is returned as a string.

sep : 
character, default ‘,’
Field delimiter for the output file.

na_rep : 
string, default ‘’
Missing data representation

float_format : 
string, default None
Format string for floating point numbers

columns : 
sequence, optional
Columns to write

header : 
boolean or list of string, default True
Write out column names. If a list of string is given it is assumed to be aliases for the column names

index : 
boolean, default True
Write row names (index)

index_label : 
string or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and header and index are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. If False do not print fields for index names. Use index_label=False for easier importing in R

mode : 
str
Python write mode, default ‘w’

encoding : 
string, optional
A string representing the encoding to use in the output file, defaults to ‘ascii’ on Python 2 and ‘utf-8’ on Python 3.

compression : 
string, optional
a string representing the compression to use in the output file, allowed values are ‘gzip’, ‘bz2’, ‘xz’, only used when the first argument is a filename

line_terminator : 
string, default '\n'
The newline character or character sequence to use in the output file

quoting : 
optional constant from csv module
defaults to csv.QUOTE_MINIMAL. If you have set a float_format then floats are comverted to strings and thus csv.QUOTE_NONNUMERIC will treat them as non-numeric

quotechar : 
string (length 1), default ‘”’
character used to quote fields

doublequote : 
boolean, default True
Control quoting of quotechar inside a field

escapechar : 
string (length 1), default None
character used to escape sep and quotechar when appropriate

chunksize : 
int or None
rows to write at a time

tupleize_cols : 
boolean, default False
write multi_index columns as a list of tuples (if True) or new (expanded format) if False

date_format :
 string, default None
Format string for datetime objects

decimal: 
string, default ‘.’
Character recognized as decimal separator. E.g. use ‘,’ for European data
New in version 0.16.0.



你可能感兴趣的:(数据分析,python)