今天写个宏玩,结果发现一个可能是编码引发的问题··· 完全不知道怎么处理··
import pandas as pd
import numpy as np
import pymongo
myclient = pymongo.MongoClient(host='mongodb://localhost:27017/')
mydbs = myclient['db_WebCrwr']
mycll = mydbs['ttt']
lst_test1 = [{'A':52}]
dtf_test1 = pd.DataFrame(lst_test1)
lst_test2 = dtf_test1[['A']].to_dict(orient='records')
print("lst_test1[0] = ",lst_test1[0])
print("lst_test2[0] = ",lst_test2[0],'\n')
print("type of lst_test1[0]: ",type(lst_test2[0]))
print("type of lst_test2[0]: ",type(lst_test2[0]),'\n')
print("result of lst_test1[0]: ",mycll.find_one(lst_test1[0]))
print("result of lst_test2[0]: ",mycll.find_one(lst_test2[0]))
即直接输入的列表lst_test1可以正常使用pymongo的find_one找到对应的mongodb库表中已经存在的document,但是转成数据框再转回列表lst_test2后就不能正常执行find_one了,报解码错误。
运行结果如下:
pandas将字典转数据框再转回来的时候,原来type = int的数值type = numpy.int64导致pymongo不能encode;解答参考:
https://segmentfault.com/q/1010000007300393/a-1020000012847675
具体的报错信息如下:
InvalidDocument Traceback (most recent call last)
in ()
26
27 print(mycll.find_one(lst_test1[0]))
---> 28 print(mycll.find_one(lst_test2[0]))
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\collection.py in find_one(self, filter, *args, **kwargs)
1260
1261 cursor = self.find(filter, *args, **kwargs)
-> 1262 for result in cursor.limit(-1):
1263 return result
1264 return None
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\cursor.py in next(self)
1187 if self.__empty:
1188 raise StopIteration
-> 1189 if len(self.__data) or self._refresh():
1190 if self.__manipulate:
1191 _db = self.__collection.database
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\cursor.py in _refresh(self)
1102 self.__session,
1103 self.__collection.database.client)
-> 1104 self.__send_message(q)
1105 elif self.__id: # Get More
1106 if self.__limit:
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\cursor.py in __send_message(self, operation)
929 try:
930 response = client._send_message_with_response(
--> 931 operation, exhaust=self.__exhaust, address=self.__address)
932 self.__address = response.address
933 if self.__exhaust:
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\mongo_client.py in _send_message_with_response(self, operation, exhaust, address)
1143 self.__all_credentials,
1144 self._event_listeners,
-> 1145 exhaust)
1146
1147 def _reset_on_error(self, server, func, *args, **kwargs):
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\mongo_client.py in _reset_on_error(self, server, func, *args, **kwargs)
1154 """
1155 try:
-> 1156 return func(*args, **kwargs)
1157 except NetworkTimeout:
1158 # The socket has been closed. Don't reset the server.
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\server.py in send_message_with_response(self, operation, set_slave_okay, all_credentials, listeners, exhaust)
92 use_find_cmd = operation.use_command(sock_info, exhaust)
93 message = operation.get_message(
---> 94 set_slave_okay, sock_info, use_find_cmd)
95 request_id, data, max_doc_size = self._split_message(message)
96
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\message.py in get_message(self, set_slave_ok, sock_info, use_cmd)
320 0, spec, self.db, self.read_preference,
321 set_slave_ok, False, self.codec_options,
--> 322 ctx=sock_info.compression_context)
323 return request_id, msg, size
324 ns = _UJOIN % (self.db, "$cmd")
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\message.py in _op_msg(flags, command, dbname, read_preference, slave_ok, check_keys, opts, ctx)
677 flags, command, identifier, docs, check_keys, opts, ctx)
678 return _op_msg_uncompressed(
--> 679 flags, command, identifier, docs, check_keys, opts)
680 finally:
681 # Add the field back to the command.
InvalidDocument: Cannot encode object: 52