[原创]python 利用pandas 读取csv 导入mongodb数据库[数据处理]

Python
# coding:utf-8 import pandas as pd import pymongo from odo import odo Client = pymongo.MongoClient(host='localhost',port=27017) df = pd.read_csv('User.csv',error_bad_lines=False) #error_bad_lines: if False then any lines causing an error will be skipped bad lines db = Client.weibo.repacleartuser df.columns = df.columns.str.upper() # columns 都转换成字符串,变成大写 df.rename(columns={ '邮箱是否激活':'邮箱激活', '手机是否激活': '手机激活'}, inplace=True) # columns 名称替换 # df.rename(columns = {'$b':'B'}, inplace = True) # http://www.cnblogs.com/hhh5460/p/5816774.html # for index, row in df.iterrows(): # print(row['ID']) # Dataframe 进行循环 # if isinstance(row['ID'],int): # print(row) # isinstance 判断 row['ID']是不是int类型 newdf= df[['ID','用户名','邮箱','手机号','上次登录','登录次数','注册来源','注册IP','邮箱激活','手机激活','账户状态','系统时间']] from time import time b = time() db.insert_many(newdf.to_dict('records')) # 插入数据 # 200万数据用时96.56582021713257秒 e = time() print(e-b)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# coding:utf-8
 
import pandas as pd
import pymongo
from odo import odo
Client = pymongo . MongoClient ( host = 'localhost' , port = 27017 )
df =    pd . read_csv ( 'User.csv' , error_bad_lines = False )
 
#error_bad_lines: if False then any lines causing an error will be skipped bad lines
 
db = Client . weibo . repacleartuser
 
df . columns = df . columns . str . upper ( )
 
# columns 都转换成字符串,变成大写
 
df . rename ( columns = { '邮箱是否激活' : '邮箱激活' , '手机是否激活' : '手机激活' } , inplace = True )
 
# columns 名称替换
 
# df.rename(columns = {'$b':'B'}, inplace = True)
 
# http://www.cnblogs.com/hhh5460/p/5816774.html
 
# for index, row in df.iterrows():
#      print(row['ID'])
# Dataframe 进行循环
 
 
#     if isinstance(row['ID'],int):
#         print(row)
# isinstance 判断 row['ID']是不是int类型
 
 
 
newdf = df [ [ 'ID' , '用户名' , '邮箱' , '手机号' , '上次登录' , '登录次数' , '注册来源' , '注册IP' , '邮箱激活' , '手机激活' , '账户状态' , '系统时间' ] ]
 
from time import time
b = time ( )
db . insert_many ( newdf . to_dict ( 'records' ) )
# 插入数据
# 200万数据用时96.56582021713257秒
 
e = time ( )
print ( e - b )



你可能感兴趣的:([原创]python 利用pandas 读取csv 导入mongodb数据库[数据处理])