爬网站采用单线程时候太慢,采用线程池多线程爬的过程中。db插入需要
scoped_session
直接发代码
class DBEngine(object): def __init__(self): self.engine = create_engine('sqlite:///train.sqlite', echo = False) # db_session = sessionmaker(autocommit=False,autoflush=False,bind=self.engine) # self.session = db_session() self._dbSession = scoped_session( sessionmaker( autocommit=False, autoflush=False, bind=self.engine ) ) def closeDB(self): self._dbSession().close() def saveTrainCode( self , code ): dbc = self.getTrainCodeIdWithCode( code ) if not dbc: dbc = DBTrainCode( code ) self._dbSession().add( dbc ) self._dbSession().commit() def getTrainCodeIdWithCode( self , code ): dbc = self._dbSession().query( DBTrainCode ).filter( DBTrainCode.code == code ).first() if dbc: return dbc.id return None def saveTrainListWithCode(self , tcid , no, city, arrive_time , start_time , stop_time , run_time, kilometer , superprice = 0 , softsleeperpriceup = 0, softsleeperpricedown = 0, hardsleeperpriceup = 0, hardsleeperpricemiddle = 0 , hardsleeperpricedown = 0 , secondprice = 0 , firstprice = 0, hardsitprice = 0 , softsitprice = 0): city = city.decode('utf-8') arrive_time = arrive_time.decode('utf-8') start_time = start_time.decode('utf-8') stop_time = stop_time.decode('utf-8') run_time = run_time.decode('utf-8') dbtl = self._dbSession().query( DBTrainList ) . filter( DBTrainList.no == no, DBTrainList.traincode == tcid, DBTrainList.city == city).first() if not dbtl: dbtl = DBTrainList(tcid , no, city , arrive_time, start_time , stop_time , run_time , kilometer , superprice, softsleeperpriceup , softsleeperpricedown , hardsleeperpriceup, hardsleeperpricemiddle, hardsleeperpricedown, secondprice, firstprice , hardsitprice , softsitprice) self._dbSession().add ( dbtl ) self._dbSession().commit() def getTrainListWithCode(self , code ): return self._dbSession().query( DBTrainList ) . filter( DBTrainList.traincode == code ).all()