Python +Mysql 简单安装部署步骤:
下载 mysql-5.5.61-winx64,按提示默认安装,相关的安装目录 :
g:\MySQL Datafiles\
C:\ProgramData\Microsoft\Windows\Start Menu\Programs\MySQL\MySQL Server 5.5
C:\Program Files\MySQL\MySQL Server 5.5\bin
创建数据库及更改权限:
create database demo;
grant all on *.* to root@localhost identified by '';
exit
导入数据库文件:
mysql -u root -p demo < all_gzdata.sql
mysql -u root -p demo
mysql> show tables;
运行python操作数据库的代码:
import pandas as pd
import MySQLdb
import pymysql
pymysql.install_as_MySQLdb()
from sqlalchemy import create_engine
def page199(i): #自定义统计函数
j = i[['fullURL','pageTitle']][(i['fullURLId'].str.contains('199')) & (i['fullURL'].str.contains('\?'))]
j['pageTitle'].fillna(u'空',inplace=True)
j['type'] = u'其他' # 添加空列
j['type'][j['pageTitle'].str.contains(u'法律快车-律师助手')]= u'法律快车-律师助手'
j['type'][j['pageTitle'].str.contains(u'咨询发布成功')]= u'咨询发布成功'
j['type'][j['pageTitle'].str.contains(u'免费发布法律咨询' )] = u'免费发布法律咨询'
j['type'][j['pageTitle'].str.contains(u'法律快搜')] = u'快搜'
j['type'][j['pageTitle'].str.contains(u'法律快车法律经验')] = u'法律快车法律经验'
j['type'][j['pageTitle'].str.contains(u'法律快车法律咨询')] = u'法律快车法律咨询'
j['type'][(j['pageTitle'].str.contains(u'_法律快车')) | (j['pageTitle'].str.contains(u'-法律快车'))] = u'法律快车'
j['type'][j['pageTitle'].str.contains(u'空')] = u'空'
return j
# 注意:获取一次sql对象就需要重新访问一下数据库
engine = create_engine('mysql+pymysql://root:@127.0.0.1:3306/demo?charset=utf8mb4')
sql = pd.read_sql('all_gzdata', engine, chunksize = 10000)# 分块读取数据库信息
counts4 = [page199(i) for i in sql] # 逐块统计
counts4 = pd.concat(counts4)
d1 = counts4['type'].value_counts()
print (d1)
d2 = counts4[counts4['type']==u'其他']
print (d2)
d2 = pd.DataFrame(d2)
# 先自定义函数将表格写入数据库里,以备操作过程中有些数据要写入数据库
def savetosql(DF,tablename):
import pandas as pd
from sqlalchemy import create_engine
yconnect = create_engine('mysql+mysqldb://root:@127.0.0.1:3306/demo?charset=utf8mb4')
pd.io.sql.to_sql(DF,tablename, yconnect, schema='demo', if_exists='append')
savetosql(d2,'199elsepercentage')# 将199的网页中的“其他”类型的数据存到数据库中
python写入数据库报错:
File "", line 40, in
savetosql(d2,'199elsePercentage')
File "", line 38, in savetosql
pd.io.sql.to_sql(DF,tablename, yconnect, schema='demo', if_exists='append')
File "G:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py", line 450, in to_sql
chunksize=chunksize, dtype=dtype)
File "G:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py", line 1149, in to_sql
table.insert(chunksize)
File "G:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py", line 663, in insert
self._execute_insert(conn, keys, chunk_iter)
File "G:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py", line 638, in _execute_insert
conn.execute(*self.insert_statement(data, conn))
File "G:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 948, in execute
return meth(self, multiparams, params)
File "G:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\sql\elements.py", line 269, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "G:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1060, in _execute_clauseelement
compiled_sql, distilled_params
File "G:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1200, in _execute_context
context)
File "G:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1413, in _handle_dbapi_exception
exc_info
File "G:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py", line 203, in raise_from_cause
reraise(type(exception), exception, tb=exc_tb, cause=cause)
File "G:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py", line 186, in reraise
raise value.with_traceback(tb)
File "G:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1193, in _execute_context
context)
File "G:\ProgramData\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py", line 507, in do_execute
cursor.execute(statement, parameters)
File "G:\ProgramData\Anaconda3\lib\site-packages\pymysql\cursors.py", line 170, in execute
result = self._query(query)
File "G:\ProgramData\Anaconda3\lib\site-packages\pymysql\cursors.py", line 328, in _query
conn.query(q)
File "G:\ProgramData\Anaconda3\lib\site-packages\pymysql\connections.py", line 516, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered)
File "G:\ProgramData\Anaconda3\lib\site-packages\pymysql\connections.py", line 727, in _read_query_result
result.read()
File "G:\ProgramData\Anaconda3\lib\site-packages\pymysql\connections.py", line 1066, in read
first_packet = self.connection._read_packet()
File "G:\ProgramData\Anaconda3\lib\site-packages\pymysql\connections.py", line 683, in _read_packet
packet.check_error()
File "G:\ProgramData\Anaconda3\lib\site-packages\pymysql\protocol.py", line 220, in check_error
err.raise_mysql_exception(self._data)
File "G:\ProgramData\Anaconda3\lib\site-packages\pymysql\err.py", line 109, in raise_mysql_exception
raise errorclass(errno, errval)
InternalError: (pymysql.err.InternalError) (1366, "· 1") [SQL: 'INSERT INTO demo.`199elsePercentage`......
mysql 字符集编码出错,检查mysql字符集:
mysql> show variables like 'character_set_database';
+------------------------+--------+
| Variable_name | Value |
+------------------------+--------+
| character_set_database | latin1 |
+------------------------+--------+
1 row in set (0.00 sec)
mysql> show variables like 'character%';
+--------------------------+---------------------------------------------------------+
| Variable_name | Value |
+--------------------------+---------------------------------------------------------+
| character_set_client | utf8mb4 |
| character_set_connection | utf8mb4 |
| character_set_database | latin1 |
| character_set_filesystem | binary |
| character_set_results | utf8mb4 |
| character_set_server | utf8mb4 |
| character_set_system | utf8 |
| character_sets_dir | C:\Program Files\MySQL\MySQL Server 5.5\share\charsets\ |
+--------------------------+---------------------------------------------------------+
8 rows in set (0.00 sec)
更改编码alter database demo CHARACTER SET utf8mb4;插入仍失败,未生效:
mysql> alter database demo CHARACTER SET utf8mb4;
Query OK, 1 row affected (0.00 sec)
mysql> show variables like 'character%';
+--------------------------+---------------------------------------------------------+
| Variable_name | Value |
+--------------------------+---------------------------------------------------------+
| character_set_client | utf8mb4 |
| character_set_connection | utf8mb4 |
| character_set_database | utf8mb4 |
| character_set_filesystem | binary |
| character_set_results | utf8mb4 |
| character_set_server | utf8mb4 |
| character_set_system | utf8 |
| character_sets_dir | C:\Program Files\MySQL\MySQL Server 5.5\share\charsets\ |
+--------------------------+---------------------------------------------------------+
8 rows in set (0.00 sec)
mysql> show variables like 'character_set_database';
+------------------------+---------+
| Variable_name | Value |
+------------------------+---------+
| character_set_database | utf8mb4 |
+------------------------+---------+
修改my.cnf文件(C:\Program Files\MySQL\MySQL Server 5.5),将编码改为utf8mb4,在windows服务中重启数据库
[mysql]
#default-character-set=latin1
default-character-set=utf8mb4
#character-set-server=latin1
character-set-server=utf8mb4
collation-server = utf8mb4_unicode_ci
init_connect='SET NAMES utf8mb4'
mysq已有的数据库和表的字符集更改使用以下语句:
mysql> ALTER DATABASE demo CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
Query OK, 1 row affected (0.00 sec)
mysql>
mysql> ALTER TABLE 199elsePercentage CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;
Query OK, 0 rows affected (0.30 sec)
Records: 0 Duplicates: 0 Warnings: 0
编码更改以后,mysql数据库插入成功:
mysql> select count(1) from 199elsepercentage;
+----------+
| count(1) |
+----------+
| 359 |
+----------+
1 row in set (0.00 sec)