这是一个链接,现在我想按我的需求(中文关键词),从这个网页把我需要的相关信息搞下来。how to do?just do following me.
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'test.ui'
#
# Created by: PyQt4 UI code generator 4.11.4
#
# WARNING! All changes made in this file will be lost!
from PyQt4 import QtCore, QtGui
try:
_fromUtf8 = QtCore.QString.fromUtf8
except AttributeError:
def _fromUtf8(s):
return s
try:
_encoding = QtGui.QApplication.UnicodeUTF8
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig, _encoding)
except AttributeError:
def _translate(context, text, disambig):
return QtGui.QApplication.translate(context, text, disambig)
class Ui_Rockson(object):
def setupUi(self, Rockson):
Rockson.setObjectName(_fromUtf8("Rockson"))
Rockson.resize(440, 153)
Rockson.setModal(False)
self.buttonBox = QtGui.QDialogButtonBox(Rockson)
self.buttonBox.setGeometry(QtCore.QRect(-30, 100, 341, 32))
self.buttonBox.setOrientation(QtCore.Qt.Horizontal)
self.buttonBox.setStandardButtons(QtGui.QDialogButtonBox.Cancel|QtGui.QDialogButtonBox.Ok)
self.buttonBox.setObjectName(_fromUtf8("buttonBox"))
self.widget = QtGui.QWidget(Rockson)
self.widget.setGeometry(QtCore.QRect(90, 20, 81, 71))
self.widget.setObjectName(_fromUtf8("widget"))
self.verticalLayout_2 = QtGui.QVBoxLayout(self.widget)
self.verticalLayout_2.setObjectName(_fromUtf8("verticalLayout_2"))
self.label_3 = QtGui.QLabel(self.widget)
self.label_3.setObjectName(_fromUtf8("label_3"))
self.verticalLayout_2.addWidget(self.label_3)
self.label = QtGui.QLabel(self.widget)
self.label.setObjectName(_fromUtf8("label"))
self.verticalLayout_2.addWidget(self.label)
self.label_2 = QtGui.QLabel(self.widget)
self.label_2.setObjectName(_fromUtf8("label_2"))
self.verticalLayout_2.addWidget(self.label_2)
self.widget1 = QtGui.QWidget(Rockson)
self.widget1.setGeometry(QtCore.QRect(170, 20, 135, 74))
self.widget1.setObjectName(_fromUtf8("widget1"))
self.verticalLayout = QtGui.QVBoxLayout(self.widget1)
self.verticalLayout.setObjectName(_fromUtf8("verticalLayout"))
self.year = QtGui.QLineEdit(self.widget1)
self.year.setContextMenuPolicy(QtCore.Qt.DefaultContextMenu)
self.year.setObjectName(_fromUtf8("year"))
self.verticalLayout.addWidget(self.year)
self.keyword1 = QtGui.QLineEdit(self.widget1)
self.keyword1.setObjectName(_fromUtf8("keyword1"))
self.verticalLayout.addWidget(self.keyword1)
self.keyword2 = QtGui.QLineEdit(self.widget1)
self.keyword2.setObjectName(_fromUtf8("keyword2"))
self.verticalLayout.addWidget(self.keyword2)
self.retranslateUi(Rockson)
QtCore.QObject.connect(self.buttonBox, QtCore.SIGNAL(_fromUtf8("accepted()")), Rockson.accept)
QtCore.QObject.connect(self.buttonBox, QtCore.SIGNAL(_fromUtf8("rejected()")), Rockson.reject)
QtCore.QMetaObject.connectSlotsByName(Rockson)
Rockson.setTabOrder(self.year, self.keyword1)
Rockson.setTabOrder(self.keyword1, self.keyword2)
Rockson.setTabOrder(self.keyword2, self.buttonBox)
def retranslateUi(self, Rockson):
Rockson.setWindowTitle(_translate("Rockson", "Dialog", None))
self.label_3.setText(_translate("Rockson", "年份", None))
self.label.setText(_translate("Rockson", "关键词1", None))
self.label_2.setText(_translate("Rockson", "关键词2", None))
self.year.setText(_translate("Rockson", "2015", None))
self.keyword1.setText(_translate("Rockson", "计算", None))
self.keyword2.setText(_translate("Rockson", "数学", None))
#-*- coding: utf8-*-
#! /usr/bin/env python
#---------------------------------------
print u"""#--------------------------------------- # 程序:保研夏令营信息收集汇总 # 版本:0.5 # 作者:陆嵩 # 日期:2016-4-20 # 语言:Python 2.7 # 操作:输入年份、关键词1和关键词2,若你只有一个关键词,请将两个关键词填写成一样 # 功能:将信息打包txt存储到本地,目前仅支持2015年。若不能生成,请重试一次。 若要2016实时数据,请联系本人@qq962907540 #--------------------------------------- """
print u'完成后数据保存D盘,请注意查看。'
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from PyQt4.QtGui import *
from PyQt4.QtCore import *
import test
class TestDialog(QDialog,test.Ui_Rockson):
def __init__(self,parent=None):
super(TestDialog,self).__init__(parent)
self.setupUi(self)
app=QApplication(sys.argv)
dialog=TestDialog()
dialog.show()
app.exec_()
year=dialog.year.text()
keyword1=dialog.keyword1.text()
keyword2=dialog.keyword2.text()
import urllib
import urllib2
from bs4 import BeautifulSoup
import re
if year==u"2015":
url = 'http://www.eeban.com/forum.php?mod=viewthread&tid=325630&page=1&_dsign=69293742'
if year==u"2014":
url = 'http://page.renren.com/baoyanluntan/note/930842040'
Response = urllib2.urlopen(url)
Page = Response.read()
soup = BeautifulSoup(Page,from_encoding="utf8")
fonts=soup.find_all('font',size='4')
file_name=year+u'年夏令营信息汇总'+keyword1+'-'+keyword2
file_object = open('D:\\'+file_name+'.txt', 'w+')
for font in fonts:
if unicode(keyword1) in font.text:
if unicode(keyword2) in font.text:
print font.text+"\n\n"
file_object.write(font.text+"\n\n")
file_object.close()
#print "\n\n\n\n\n\n\n\n\n\n"
#trs=soup.find_all('tr')
#for tr in trs:
#if unicode(u"数学") in tr.text:
#if unicode(u"计算") in tr.text:
# print tr.text+"\n\n"
#print font.find('font').text+"\n"+font.find('a')['href']+"\n\n"
#def conditions(tag):
# flag=unicode(u"5L") in tag.text;
# return flag
#print ref
#print(soup.prettify())
#td=soup.find('td',id="postmessage_1910688")
#fonts=td.find_all('font')
#for font in fonts:
# if type(font.previous_sibling)=='bs4.element.Tag':
# print font.previous_sibling.text+u"\n"
# print font.text+u"\n\n"
#print str(tags.contents)
#for per_tag in td:
#if unicode(u"数学") in per_tag.text:
# print per_tag.text+u"\n\n\n\n\n"
#if unicode(u"厦门大学") in per_tag.text:
# print per_tag.text+"\n"
#print ref
setup.py py2exe
。from distutils.core import setup
import py2exe
setup(console=[{"script":"main.py"}], options={"py2exe":{"includes":["sip"]}})
一、 当使用pyQT做了界面的时候,我们在进行py到exe进行转换的时候,编写的setup,一般如下。否则,用终端方式打开生成的exe会提示缺少sip文件。
from distutils.core import setup
import py2exe
setup(console=[{"script":"main.py"}], options={"py2exe":{"includes":["sip"]}})
但一般情况是像这样的:
from distutils.core import setup
import py2exe
setup(console=['main.py']) #这里console改为window那么dos窗口不出现
二、生成exe的过程中可能还会遇到其他问题,比如计算机缺少某个dll文件啥的,需要你自行进行下载和注册相关dll。以下是对msvcp90.dll进行注册的批处理文件代码。
@echo 开始注册
copy msvcp90.dll %windir%\system32\
regsvr32 %windir%\system32\msvcp90.dll /s
@echo msvcp90.dll注册成功
@pause
三、说起bat文件,我们在终端测试时,不要每次都要开一遍命令行窗口,可以编写记事本文件,后缀改为bat即可。
四、 有时候会提示编码出错,不能进行转换,这时打开notepad++另存为需要的格式即可。