mysql数据库 导入数据
1. panda
效率超高 对内存要求高 网络稳定性
# 读取文件
ratings_names = ['user_id', 'movie_id', 'ratings', 'rating_time']
ratings = pd.read_table('/home/qjun/桌面/movielens/ratings.dat',
sep='::', header=None, engine='python',
names=ratings_names)
# 存到sql
ratings.to_sql('ratings',db, index=False, if_exists='append')
2.pymysql
import pymysql
class DB:
def __init__(self):
self.con = None
self._get_con()
def _get_con(self):
self.con = pymysql.connect(host='localhost', port=3306,
database='movielens', charset='utf8',
user='root', password='123456')
def insert_ratings(self, user_id, movie_id, rating, rating_time):
try:
with self.con.cursor() as cursor:
result = cursor.execute(
'insert into tb_ratings values (%s, %s, %s, %s)',
(user_id, movie_id, rating, rating_time)
)
if result == 1:
print('添加成功!')
self.con.commit()
finally:
print('!!!!!!'*20)
# self.con.close()
def ratings2sql():
with open('ratings.dat', 'r') as f:
data, count = None, 0
db = DB()
while True:
count += 1
data = f.readline().strip()
if not data:
break
data = data.split('::')
print(data)
db.insert_ratings(data[0], data[1], data[2], data[3])
print(count)
if __name__ == '__main__':
ratings2sql()