pandas4:读写多种格式文件pickle,csv,excel,json,html,sql...

import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
names = scientists['Name']
print(scientists)
print(names)
                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician
0       Rosaline Franklin
1          William Gosset
2    Florence Nightingale
3             Marie Curie
4           Rachel Carson
5               John Snow
6             Alan Turing
7            Johann Gauss
Name: Name, dtype: object

1.读写pickle格式

import os
path_name = './output/scientist_name_Series.pickle'
if not os.path.exists(path_name):
    names.to_pickle(path_name)

path_all = './output/scientist_DataFrame.pickle'
if not os.path.exists(path_all):
    scientists.to_pickle(path_all)

#读取pickle文件
scientists_names_frme_pickle = pd.read_pickle(path_name)
print(scientists_names_frme_pickle)
0       Rosaline Franklin
1          William Gosset
2    Florence Nightingale
3             Marie Curie
4           Rachel Carson
5               John Snow
6             Alan Turing
7            Johann Gauss
Name: Name, dtype: object
path_all = './output/scientist_DataFrame.pickle'
if not os.path.exists(path_all):
    scientists.to_pickle(path_all)
scientists_frme_pickle = pd.read_pickle(path_all)
print(scientists_frme_pickle)
                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician

2.读写CSV格式

2.1 读写csv格式的Serial

import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
names = scientists['Name']
print(scientists)
print(names)
                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician
0       Rosaline Franklin
1          William Gosset
2    Florence Nightingale
3             Marie Curie
4           Rachel Carson
5               John Snow
6             Alan Turing
7            Johann Gauss
Name: Name, dtype: object
import os
path_name = './output/scientist_name_Series.CSV'
if not os.path.exists(path_name):
    names.to_csv(path_name)
scientists_name_frme_CSV = pd.read_csv(path_name)
print(scientists_name_frme_CSV)
   Unnamed: 0                  Name
0           0     Rosaline Franklin
1           1        William Gosset
2           2  Florence Nightingale
3           3           Marie Curie
4           4         Rachel Carson
5           5             John Snow
6           6           Alan Turing
7           7          Johann Gauss
path_name = './output/scientist_name_Series1.CSV'
if not os.path.exists(path_name):
    names.to_csv(path_name,sep='*')
scientists_name_frme_CSV = pd.read_csv(path_name)
print(scientists_name_frme_CSV)
                    *Name
0     0*Rosaline Franklin
1        1*William Gosset
2  2*Florence Nightingale
3           3*Marie Curie
4         4*Rachel Carson
5             5*John Snow
6           6*Alan Turing
7          7*Johann Gauss
path_name = './output/scientist_name_Series2.CSV'
if not os.path.exists(path_name):
    names.to_csv(path_name,index=False)
scientists_name_frme_CSV = pd.read_csv(path_name)
print(scientists_name_frme_CSV)
                   Name
0     Rosaline Franklin
1        William Gosset
2  Florence Nightingale
3           Marie Curie
4         Rachel Carson
5             John Snow
6           Alan Turing
7          Johann Gauss

2.2 读写csv格式的DataFrame

path_name = './output/scientist_DataFrame.CSV'
if not os.path.exists(path_name):
    scientists.to_csv(path_name)
scientists_frme_CSV = pd.read_csv(path_name)
print(scientists_frme_CSV)
   Unnamed: 0                  Name        Born        Died  Age  \
0           0     Rosaline Franklin  1920-07-25  1958-04-16   37   
1           1        William Gosset  1876-06-13  1937-10-16   61   
2           2  Florence Nightingale  1820-05-12  1910-08-13   90   
3           3           Marie Curie  1867-11-07  1934-07-04   66   
4           4         Rachel Carson  1907-05-27  1964-04-14   56   
5           5             John Snow  1813-03-15  1858-06-16   45   
6           6           Alan Turing  1912-06-23  1954-06-07   41   
7           7          Johann Gauss  1777-04-30  1855-02-23   77   

           Occupation  
0             Chemist  
1        Statistician  
2               Nurse  
3             Chemist  
4           Biologist  
5           Physician  
6  Computer Scientist  
7       Mathematician  
path_name = './output/scientist_DataFrame2.CSV'
if not os.path.exists(path_name):
    scientists.to_csv(path_name,sep='*')
scientists_frme_CSV = pd.read_csv(path_name)
print(scientists_frme_CSV)
                      *Name*Born*Died*Age*Occupation
0  0*Rosaline Franklin*1920-07-25*1958-04-16*37*C...
1  1*William Gosset*1876-06-13*1937-10-16*61*Stat...
2  2*Florence Nightingale*1820-05-12*1910-08-13*9...
3     3*Marie Curie*1867-11-07*1934-07-04*66*Chemist
4  4*Rachel Carson*1907-05-27*1964-04-14*56*Biolo...
5     5*John Snow*1813-03-15*1858-06-16*45*Physician
6  6*Alan Turing*1912-06-23*1954-06-07*41*Compute...
7  7*Johann Gauss*1777-04-30*1855-02-23*77*Mathem...
path_name = './output/scientist_DataFrame3.CSV'
if not os.path.exists(path_name):
    scientists.to_csv(path_name,index=False)
scientists_frme_CSV = pd.read_csv(path_name)
print(scientists_frme_CSV)
                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician

3读写excel文件

3.1读写excel文件的Serial

import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
names = scientists['Name']
print(scientists)
#print(names)
                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician
names.to_excel('./output/scientist_name_series.xls')
names_df = names.to_frame()
names_df.to_excel('./output/scientist_name_df.xls')
scientists.to_excel('./output/scientist_df1.xls',index=False)
scientists.to_excel('./output/scientist_df2.xls',sheet_name='scientist',index=False)
scientists_name_frme_excel= pd.read_excel('./output/scientist_name_series.xls')
print(scientists_name_frme_excel)
   Unnamed: 0                  Name
0           0     Rosaline Franklin
1           1        William Gosset
2           2  Florence Nightingale
3           3           Marie Curie
4           4         Rachel Carson
5           5             John Snow
6           6           Alan Turing
7           7          Johann Gauss
scientists_name_df_frme_excel= pd.read_excel('./output/scientist_name_df.xls')
print(scientists_name_df_frme_excel)
   Unnamed: 0                  Name
0           0     Rosaline Franklin
1           1        William Gosset
2           2  Florence Nightingale
3           3           Marie Curie
4           4         Rachel Carson
5           5             John Snow
6           6           Alan Turing
7           7          Johann Gauss
scientists_frme_excel= pd.read_excel('./output/scientist_df1.xls')
print(scientists_frme_excel)
                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician
scientists_frme_excel= pd.read_excel('./output/scientist_df2.xls')
print(scientists_frme_excel)
                   Name        Born        Died  Age          Occupation
0     Rosaline Franklin  1920-07-25  1958-04-16   37             Chemist
1        William Gosset  1876-06-13  1937-10-16   61        Statistician
2  Florence Nightingale  1820-05-12  1910-08-13   90               Nurse
3           Marie Curie  1867-11-07  1934-07-04   66             Chemist
4         Rachel Carson  1907-05-27  1964-04-14   56           Biologist
5             John Snow  1813-03-15  1858-06-16   45           Physician
6           Alan Turing  1912-06-23  1954-06-07   41  Computer Scientist
7          Johann Gauss  1777-04-30  1855-02-23   77       Mathematician

3.2读写excel文件的DataFrame

import xlrd
#work book, sheet
data = xlrd.open_workbook('./output/scientist_df2.xls')
sheet = data.sheet_by_name('scientist')
#sheet = data.sheet_by_index(0)
print(sheet.row_values(1))
print(sheet.col_values(1))
['Rosaline Franklin', '1920-07-25', '1958-04-16', 37.0, 'Chemist']
['Born', '1920-07-25', '1876-06-13', '1820-05-12', '1867-11-07', '1907-05-27', '1813-03-15', '1912-06-23', '1777-04-30']
print('行数=',sheet.nrows)
print('列数=',sheet.ncols)
print(sheet)
行数= 9
列数= 5

print(sheet.cell(0,0).value)
print(sheet.cell(2,3).value)
Name
61.0
print(data.sheet_names())
['scientist']
print(sheet.name)
print(sheet.row_values(1))
print(sheet.col_values(1))
scientist
['Rosaline Franklin', '1920-07-25', '1958-04-16', 37.0, 'Chemist']
['Born', '1920-07-25', '1876-06-13', '1820-05-12', '1867-11-07', '1907-05-27', '1813-03-15', '1912-06-23', '1777-04-30']

4.其他格式 json,html,sql

import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
#scientists.to_clipboard()
#print(scientists.to_dicti())
print(scientists.to_html('./t.html'))
print(scientists.to_json('./t.json'))
None
None
import sqlite3
import sqlalchemy
engine = sqlalchemy.create_engine('sqlite:///my_db.sqlite')
scientists.to_sql('scientists',engine)

你可能感兴趣的:(#,pandas)