import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
names = scientists['Name']
print(scientists)
print(names)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
0 Rosaline Franklin
1 William Gosset
2 Florence Nightingale
3 Marie Curie
4 Rachel Carson
5 John Snow
6 Alan Turing
7 Johann Gauss
Name: Name, dtype: object
1.读写pickle格式
import os
path_name = './output/scientist_name_Series.pickle'
if not os.path.exists(path_name):
names.to_pickle(path_name)
path_all = './output/scientist_DataFrame.pickle'
if not os.path.exists(path_all):
scientists.to_pickle(path_all)
scientists_names_frme_pickle = pd.read_pickle(path_name)
print(scientists_names_frme_pickle)
0 Rosaline Franklin
1 William Gosset
2 Florence Nightingale
3 Marie Curie
4 Rachel Carson
5 John Snow
6 Alan Turing
7 Johann Gauss
Name: Name, dtype: object
path_all = './output/scientist_DataFrame.pickle'
if not os.path.exists(path_all):
scientists.to_pickle(path_all)
scientists_frme_pickle = pd.read_pickle(path_all)
print(scientists_frme_pickle)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
2.读写CSV格式
2.1 读写csv格式的Serial
import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
names = scientists['Name']
print(scientists)
print(names)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
0 Rosaline Franklin
1 William Gosset
2 Florence Nightingale
3 Marie Curie
4 Rachel Carson
5 John Snow
6 Alan Turing
7 Johann Gauss
Name: Name, dtype: object
import os
path_name = './output/scientist_name_Series.CSV'
if not os.path.exists(path_name):
names.to_csv(path_name)
scientists_name_frme_CSV = pd.read_csv(path_name)
print(scientists_name_frme_CSV)
Unnamed: 0 Name
0 0 Rosaline Franklin
1 1 William Gosset
2 2 Florence Nightingale
3 3 Marie Curie
4 4 Rachel Carson
5 5 John Snow
6 6 Alan Turing
7 7 Johann Gauss
path_name = './output/scientist_name_Series1.CSV'
if not os.path.exists(path_name):
names.to_csv(path_name,sep='*')
scientists_name_frme_CSV = pd.read_csv(path_name)
print(scientists_name_frme_CSV)
*Name
0 0*Rosaline Franklin
1 1*William Gosset
2 2*Florence Nightingale
3 3*Marie Curie
4 4*Rachel Carson
5 5*John Snow
6 6*Alan Turing
7 7*Johann Gauss
path_name = './output/scientist_name_Series2.CSV'
if not os.path.exists(path_name):
names.to_csv(path_name,index=False)
scientists_name_frme_CSV = pd.read_csv(path_name)
print(scientists_name_frme_CSV)
Name
0 Rosaline Franklin
1 William Gosset
2 Florence Nightingale
3 Marie Curie
4 Rachel Carson
5 John Snow
6 Alan Turing
7 Johann Gauss
2.2 读写csv格式的DataFrame
path_name = './output/scientist_DataFrame.CSV'
if not os.path.exists(path_name):
scientists.to_csv(path_name)
scientists_frme_CSV = pd.read_csv(path_name)
print(scientists_frme_CSV)
Unnamed: 0 Name Born Died Age \
0 0 Rosaline Franklin 1920-07-25 1958-04-16 37
1 1 William Gosset 1876-06-13 1937-10-16 61
2 2 Florence Nightingale 1820-05-12 1910-08-13 90
3 3 Marie Curie 1867-11-07 1934-07-04 66
4 4 Rachel Carson 1907-05-27 1964-04-14 56
5 5 John Snow 1813-03-15 1858-06-16 45
6 6 Alan Turing 1912-06-23 1954-06-07 41
7 7 Johann Gauss 1777-04-30 1855-02-23 77
Occupation
0 Chemist
1 Statistician
2 Nurse
3 Chemist
4 Biologist
5 Physician
6 Computer Scientist
7 Mathematician
path_name = './output/scientist_DataFrame2.CSV'
if not os.path.exists(path_name):
scientists.to_csv(path_name,sep='*')
scientists_frme_CSV = pd.read_csv(path_name)
print(scientists_frme_CSV)
*Name*Born*Died*Age*Occupation
0 0*Rosaline Franklin*1920-07-25*1958-04-16*37*C...
1 1*William Gosset*1876-06-13*1937-10-16*61*Stat...
2 2*Florence Nightingale*1820-05-12*1910-08-13*9...
3 3*Marie Curie*1867-11-07*1934-07-04*66*Chemist
4 4*Rachel Carson*1907-05-27*1964-04-14*56*Biolo...
5 5*John Snow*1813-03-15*1858-06-16*45*Physician
6 6*Alan Turing*1912-06-23*1954-06-07*41*Compute...
7 7*Johann Gauss*1777-04-30*1855-02-23*77*Mathem...
path_name = './output/scientist_DataFrame3.CSV'
if not os.path.exists(path_name):
scientists.to_csv(path_name,index=False)
scientists_frme_CSV = pd.read_csv(path_name)
print(scientists_frme_CSV)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
3读写excel文件
3.1读写excel文件的Serial
import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
names = scientists['Name']
print(scientists)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
names.to_excel('./output/scientist_name_series.xls')
names_df = names.to_frame()
names_df.to_excel('./output/scientist_name_df.xls')
scientists.to_excel('./output/scientist_df1.xls',index=False)
scientists.to_excel('./output/scientist_df2.xls',sheet_name='scientist',index=False)
scientists_name_frme_excel= pd.read_excel('./output/scientist_name_series.xls')
print(scientists_name_frme_excel)
Unnamed: 0 Name
0 0 Rosaline Franklin
1 1 William Gosset
2 2 Florence Nightingale
3 3 Marie Curie
4 4 Rachel Carson
5 5 John Snow
6 6 Alan Turing
7 7 Johann Gauss
scientists_name_df_frme_excel= pd.read_excel('./output/scientist_name_df.xls')
print(scientists_name_df_frme_excel)
Unnamed: 0 Name
0 0 Rosaline Franklin
1 1 William Gosset
2 2 Florence Nightingale
3 3 Marie Curie
4 4 Rachel Carson
5 5 John Snow
6 6 Alan Turing
7 7 Johann Gauss
scientists_frme_excel= pd.read_excel('./output/scientist_df1.xls')
print(scientists_frme_excel)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
scientists_frme_excel= pd.read_excel('./output/scientist_df2.xls')
print(scientists_frme_excel)
Name Born Died Age Occupation
0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist
1 William Gosset 1876-06-13 1937-10-16 61 Statistician
2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse
3 Marie Curie 1867-11-07 1934-07-04 66 Chemist
4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist
5 John Snow 1813-03-15 1858-06-16 45 Physician
6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist
7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician
3.2读写excel文件的DataFrame
import xlrd
data = xlrd.open_workbook('./output/scientist_df2.xls')
sheet = data.sheet_by_name('scientist')
print(sheet.row_values(1))
print(sheet.col_values(1))
['Rosaline Franklin', '1920-07-25', '1958-04-16', 37.0, 'Chemist']
['Born', '1920-07-25', '1876-06-13', '1820-05-12', '1867-11-07', '1907-05-27', '1813-03-15', '1912-06-23', '1777-04-30']
print('行数=',sheet.nrows)
print('列数=',sheet.ncols)
print(sheet)
行数= 9
列数= 5
print(sheet.cell(0,0).value)
print(sheet.cell(2,3).value)
Name
61.0
print(data.sheet_names())
['scientist']
print(sheet.name)
print(sheet.row_values(1))
print(sheet.col_values(1))
scientist
['Rosaline Franklin', '1920-07-25', '1958-04-16', 37.0, 'Chemist']
['Born', '1920-07-25', '1876-06-13', '1820-05-12', '1867-11-07', '1907-05-27', '1813-03-15', '1912-06-23', '1777-04-30']
4.其他格式 json,html,sql
import pandas as pd
scientists = pd.read_csv('./data/scientists.csv')
print(scientists.to_html('./t.html'))
print(scientists.to_json('./t.json'))
None
None
import sqlite3
import sqlalchemy
engine = sqlalchemy.create_engine('sqlite:///my_db.sqlite')
scientists.to_sql('scientists',engine)