pandas学习笔记

pandas学习笔记

1.安装

brew install python3
wget https://bootstrap.pypa.io/get-pip.py | xargs python3
pip3 install pandas

2.学习代码-Series

  • code 1
from pandas import Series, DataFrame
import pandas as pd

s = Series([100, 'python', 'soochow', 'qiwsir'])
print(s)

0 100
1 python
2 soochow
3 qiwsir
dtype: object

print(s.values)

[100 'python' 'soochow' 'qiwsir']

print(s.index)

RangeIndex(start=0, stop=4, step=1)

s2 = Series([100, 'python', 'soochow', 'qiwsir'], index=['mark', 'title', 'university', 'name'])
print(s2)

mark 100
title python
university soochow
name qiwsir
dtype: object

print(s2.index)

Index(['mark', 'title', 'university', 'name'], dtype='object')

print(s2['name'])

qiwsir

s2['name'] = 'aoi'
print(s2)

mark 100
title python
university soochow
name aoi
dtype: object

sd = {
  'python': 8000, 
  'c': 8100,
  'php': 4000
}
s4 = Series(sd)
print(s4)

dtype: object
c 8100
php 4000
python 8000
dtype: int64

s6 = Series(sd, index=['java', 'python', 'c', 'php'])
print(s6)

java NaN
python 8000.0
c 8100.0
php 4000.0
dtype: float64

alist = ['java', 'perl']
s5 = Series(sd, index=alist)
print(s5)

java NaN
perl NaN
dtype: float64

print(pd.isnull(s6))

java True
python False
c False
php False
dtype: bool

print(pd.notnull(s6))

java False
python True
c True
php True
dtype: bool

print(s6.isnull())

java True
python False
c False
php False
dtype: bool

s6.index = ['p1', 'p2', 'p3', 'p4']
print(s6)

p1 NaN
p2 8000.0
p3 8100.0
p4 4000.0
dtype: float64

s3 = Series([3, 9, 4, 7], index = ['a', 'b', 'c', 'd'])
print(s3)

a 3
b 9
c 4
d 7
dtype: int64

print(s3[s3 > 5])

b 9
d 7
dtype: int64

print(s3 * 5)

a 15
b 45
c 20
d 35
dtype: int64

print (s5 + s6)

java NaN
p1 NaN
p2 NaN
p3 NaN
p4 NaN
perl NaN
dtype: float64

3.学习代码-DataFrame

import pandas as pd
from pandas import Series, DataFrame

data = {
  'name': ['yahoo', 'google', 'facebook'],
  'marks': [200, 400, 800],
  'price': [9, 3, 7]
}
f1 = DataFrame(data)
print (f1)

marks name price
0 200 yahoo 9
1 400 google 3
2 800 facebook 7

f2 = DataFrame(data, columns=['name', 'price', 'marks'])
print (f2)

name price marks
0 yahoo 9 200
1 google 3 400
2 facebook 7 800

f3 = DataFrame(data, columns=['name', 'price', 'marks', 'debt'], index=['a', 'b', 'c'])
print (f3)

name price marks debt
a yahoo 9 200 NaN
b google 3 400 NaN
c facebook 7 800 NaN

new_data = {
  'lang': {
    'firstline': 'python',
    'secondline': 'java'
  },
  'price': {
    'firstline': 8000
  }
}
f4 = DataFrame(new_data)
print (f4)

lang price
firstline python 8000.0
secondline java NaN

f4 = DataFrame(new_data, index=['firstline', 'secondline', 'thirdline'])
print (f4)

lang price
firstline python 8000.0
secondline java NaN
thirdline NaN NaN

print (f3.columns)

Index(['name', 'price', 'marks', 'debt'], dtype='object')

print (f3['name'])

a yahoo
b google
c facebook
Name: name, dtype: object

f3['debt'] = 80.1
print (f3)

name price marks debt
a yahoo 9 200 80.1
b google 3 400 80.1
c facebook 7 800 80.1

sdebt = Series([2.2, 3.3], index=['a', 'c'])
f3['debt'] = sdebt
print (f3)

name price marks debt
a yahoo 9 200 2.2
b google 3 400 NaN
c facebook 7 800 3.3

f3['price']['c'] = 300
print (f3)

name price marks debt
a yahoo 9 200 2.2
b google 3 400 NaN
c facebook 300 800 3.3

4.pandas使用

  • data
name,physics,python,math,english
Google,100,100,25,12
Facebook,45,54,44,88
Twitter,54,76,13,91
Yahoo,54,452,26,100
import pandas as pd

# 1
marks = pd.read_csv('marks.csv')
print (marks)

# 2
marks2 = pd.read_table('marks.csv', sep=',')
print (marks2)

​ name physics python math english
0 Google 100 100 25 12
1 Facebook 45 54 44 88
2 Twitter 54 76 13 91
3 Yahoo 54 452 26 100

print (marks.index)

RangeIndex(start=0, stop=4, step=1)

print (marks.columns)

Index(['name', 'physics', 'python', 'math', 'english'], dtype='object')

print (marks['name'][1])

Facebook

print (marks[:1])

​ name physics python math english
0 Google 100 100 25 12

print (marks[1:2])

​ name physics python math english
1 Facebook 45 54 44 88

print (marks['physics'])

​ name physics python math english
1 Facebook 45 54 44 88
0 100
1 45
2 54
3 54
Name: physics, dtype: int64

你可能感兴趣的:(pandas学习笔记)