junfeng_feng

python抓取所有航空公司新闻

#coding=utf-8

import urllib
import time
import re
import threading
import string
import urllib2
import chardet
import MySQLdb
import base64
import os
import sys
from pyquery import PyQuery
reload(sys)
sys.setdefaultencoding("utf-8")

#新闻最早时间
earliestTime = "2010-06-01"
mutex = threading.Lock()

def printf(obj):
    """多线程的输出"""
    global mutex
    mutex.acquire()
    print obj
    mutex.release()
    

def mysqlDataBase_init(database="jipiao"):
    """连接mysql
返回conn,cursor"""
    conn = MySQLdb.connect(host="localhost", user="root", passwd="", db=database, unix_socket="/opt/lampp/var/mysql/mysql.sock")
    cursor = conn.cursor()
    cursor.execute("SET NAMES utf8")
    cursor.execute("SET CHARACTER_SET_CLIENT=utf8")
    cursor.execute("SET CHARACTER_SET_RESULTS=utf8")
    conn.commit()
    return (conn, cursor)       

def countNewsUrl(cursor, newsUrl):
    "count  newsUrl 个数"
    selectSql = "select count(*) from jipiaonews where newsUrl='%s'" % newsUrl
    cursor.execute(selectSql)
    result = cursor.fetchone()
    return result[0]
    pass

def checkNewsUrl(cursor, newsUrl):
    """检查newsUrl
返回是否已经存在"""
    #已经有的url，不需要抓取了
    if countNewsUrl(cursor, newsUrl) > 0:
        return 1
    
    #多线程输出
    global mutex
    mutex.acquire()
    print newsUrl
    mutex.release()
    return 0

def executeSql(cursor, title, postDate, content, airport, source, newsUrl):
    """执行sql语句"""
    #替换不友好字符
    content = content.replace("'", "")
    content = content.replace("\"", "")
    
    insertsql = ("insert into jipiaonews (title,postDate,content,airport,source,newsUrl)" 
            " values('%s','%s','%s','%s','%s','%s')"
            % (title, postDate, content, airport, source, newsUrl))    
    insertsql = insertsql.encode("utf-8")
    cursor.execute(insertsql)    
    pass

def parseDate(postDate):
    """转换日期"""
    return postDate.replace("年", "-").replace("月", "-").replace("日", "")
    pass

def xiamenhangkong():
    """厦门航空"""
    airport = "厦门航空"
    matchPostDate=re.compile(r"[\d]+-[\d]+-[\d]")
    
    conn,cursor=mysqlDataBase_init()
    page=1
    while page!=-1:
        newsPageUrl="http://www.xiamenair.com.cn/newslist/news-%s.shtml"%page
        page+=1
        
        try:
            pq=PyQuery(newsPageUrl)
        except:
            break
        i=0
        while True:
            a= pq("table.artlist a:eq(%s)"%i)
            i+=1
            
            newsUrl=a.attr("href")
            if not newsUrl:
                #第一个新闻都没有，则退出
                if i==1:
                    page=-1
                break
            newsUrl="http://www.xiamenair.com.cn"+newsUrl
            if checkNewsUrl(cursor, newsUrl):
                page=-1
                break
            title=a.text()
            tr=pq("table.artlist tr:eq(%s)"%i)
            postDate = matchPostDate.search(tr.text()).group()
            newpq=PyQuery(newsUrl)
            content=newpq("div.artcon").text()

            if content.find("）")!=-1:
                content=content[content.find("）")+1:]
            content=content.strip() 
            source="" 
            
            executeSql(cursor, title, postDate, content, airport, source, newsUrl)
            pass
        pass
    conn.commit()
    conn.close()
#    newsBasePage = "http://www.xiamenair.com.cn/about_news.aspx?Page=%s"
#    #由于新闻少，所以时间向前推了
#    ownEarliestTime = "2010-01-01"
#    #页码
#    pages = 1
#    while pages != -1:
#        newsBaseUrl = newsBasePage % pages    
#        pages += 1
#        
#        pq = PyQuery(newsBaseUrl)
#        table = pq("div#header table:eq(25)")
#        conn, cursor = mysqlDataBase_init()
#        index = 0
#        while True:
#            a = table.find("a:eq(%s)" % index)
#            index += 1
#
#            newsUrl = a.attr("href")
#            if not newsUrl:
#                break
#            newsUrl = "http://www.xiamenair.com.cn/" + newsUrl    
#            title = a.text()
#            if checkNewsUrl(cursor, newsUrl):
#                pages=-1#结束
#                break
#            newspq = PyQuery(newsUrl)
#            newsTable = newspq("table:eq(3)")    
#            postDate = newsTable("div:eq(1)").html()  
#            #函数终止条件
#            if postDate < ownEarliestTime:
#                pages = -1#结束
#                break  
#            content = newsTable("table:eq(1)").text()
#            source = "" 
#            postDate = postDate.encode("utf8")
#            
#            executeSql(cursor, title, postDate, content, airport, source, newsUrl)           
#            pass
#        conn.commit()
#        conn.close()
#        pass    
#    pass

def chongqinghangkong():
    """重庆航空"""
    airport = "重庆航空"
    
    newsBaseUrl = "http://www.flycq.com/info/news/index.html"
    pq = PyQuery(newsBaseUrl) 
    conn, cursor = mysqlDataBase_init()
    #使用正则表达式，查找日期
    patternDate = re.compile(r"[\d]{6}")
    i = 0
    while i!=-1:
        a = pq("div#newsContent div ul li:eq(%s)" % i).find("a")
        newsUrl = a.attr("href")
        i += 1
        if not newsUrl:
            break
        if newsUrl.startswith("http"):
            continue
        #使用正则表达式，查找日期
        postDate = "20" + patternDate.search(newsUrl).group()
        #需要修改个别的网址
        newsUrl = newsUrl[newsUrl.rfind('/') + 1:]       
        try:         
            newsUrl = "http://www.flycq.com/info/news/" + newsUrl
            if checkNewsUrl(cursor, newsUrl):
                i=-1#结束
                break
            newspq = PyQuery(newsUrl)
        except urllib2.HTTPError:
            printf(newsUrl + "   illegal url!")
            continue            
        content = newspq("div#newsContent").text()
        if content.find("InstanceBeginEditable") != -1:
            content = content[len("InstanceBeginEditable name=\"content\" "):]
        if content.find("InstanceEndEditable") != -1:
            content = content[:-len("InstanceEndEditable")]

        title = a.text()
        source = ""

        executeSql(cursor, title, postDate, content, airport, source, newsUrl)   
        pass   
    conn.commit()
    conn.close()    
    pass

def hannanhangkong():
    """海南航空"""
    airport = "海南航空"
    
    matchUrl = re.compile(r"/.*\.html")
    matchPostDate = re.compile(r"[\d]{4}-[\d]+-[\d]+")
    
    page = 0
    while page != -1:
        if page == 0:
            newsPageUrl = "http://www.hnair.com/gyhh/hhxw/index.html" 
        else:
            newsPageUrl = "http://www.hnair.com/gyhh/hhxw/index_%s.html" % page
        page += 1
        
        conn, cursor = mysqlDataBase_init()
        pq = PyQuery(newsPageUrl)
        i = 0
        while True:
            a = pq("div.body a:eq(%s)" % i)
            newsUrl = a.attr("onclick")
            if not newsUrl:
                break
            newsUrl = matchUrl.search(newsUrl).group()
            newsUrl = "http://www.hnair.com/gyhh/hhxw" + newsUrl
            li = pq("div.body li:eq(%s)" % i)
            i += 1
            
            postDate = matchPostDate.search(li.text()).group()
            if postDate < earliestTime:
                page = -1#结束标记
                break
            if checkNewsUrl(cursor, newsUrl):
                page = -1#结束标记
                break
            title = li.text()[:-(len(postDate) + 2)]                    
            newspq = PyQuery(newsUrl)
            content = newspq("div.body").text()
            if content.find("本报讯") != -1 and content.find("）") != -1:
                content = content[content.find("）") + len("）"):]
            source = ""

            executeSql(cursor, title, postDate, content, airport, source, newsUrl)
            pass
        conn.commit()
        conn.close()
        pass

def xingfuhangkong():
    """幸福航空"""
    airport = "幸福航空"
    newsBasePage = "http://www.joy-air.com/AboutUs/NewsCenter/List.aspx?id=1"
    pq = PyQuery(newsBasePage)
    
    url = pq("div#newslist").find("a").attr("href") 
    id = int(re.search(r"[\d]+", url).group())
    newsBaseUrl = "http://www.joy-air.com/AboutUs/NewsCenter/Detail.aspx?id="
    
    conn, cursor = mysqlDataBase_init()
    while True:
        newsUrl = newsBaseUrl + str(id)
        id -= 1
        
        if checkNewsUrl(cursor, newsUrl):
            break                   
        try:
            newspq = PyQuery(newsUrl)
        except urllib2.HTTPError:
            continue
        postDate = newspq("div.dynamicmessagesbox span:eq(1)").text()
        #函数终止条件
        if postDate < earliestTime:        
            break 
        content = newspq("div.dynamicmessagesbox").text()
        title = newspq("div.dynamicmessagesbox span:eq(0)").text()
        source = ""
            
        executeSql(cursor, title, postDate, content, airport, source, newsUrl)
        pass
    conn.commit()
    conn.close()
    pass

def shenzhenhangkong():
    """深圳航空"""
    airport = "深圳航空"
    newsPageUrl = "http://www.shenzhenair.com/more.jsp?category=423afc988259499faa94bbaf8b6983b5&title=%E6%B7%B1%E8%88%AA%E6%96%B0%E9%97%BB&lan=1" 
    
    sourceFile = urllib.urlopen(newsPageUrl).read()
    pq = PyQuery(sourceFile.decode("gbk"))
    
    conn, cursor = mysqlDataBase_init()
    newspqs = pq("table:eq(17)")
    match = re.compile(r"</script>.*")
    i = 0
    while True:
        a = newspqs("a:eq(%s)" % i)
        i += 1
        
        newsUrl = a.attr("href")
        title = a.text()
        if not newsUrl:
            break
        newsUrl = newsUrl[:newsUrl.rfind("&")]
        newsUrl = "http://www.shenzhenair.com/" + newsUrl   
        title = match.search(title).group()[len(r"</script>"):].strip()
        if checkNewsUrl(cursor, newsUrl):
            break        
        sourceFile = urllib.urlopen(newsUrl).read()
        newspq = PyQuery(sourceFile.decode("gbk"))
        content = newspq("table:eq(2)").text()
        #没有找到时间
        postDate = time.strftime("%Y%m%d")
        source = ""
        begin = content.find("】")
        begin = content.find("】", begin + 1)
        end = content.rfind("【")
        if begin == -1:
            begin = 0
        content = content[begin + 2:end]
  
        executeSql(cursor, title, postDate, content, airport, source, newsUrl)
        if i % 20 == 0:
            time.sleep(5)
        pass
    conn.commit()
    conn.close()
    pass

def chunqiuhangkong():
    """春秋航空
采用的是新闻页的，比首页的新闻还少"""
    airport = "春秋航空"
    newsPageUrl = "http://www.china-sss.com/Static/New_information"

    conn, cursor = mysqlDataBase_init()
    sourceFile = urllib.urlopen(newsPageUrl).read()
    pq = PyQuery(unicode(sourceFile, "utf-8"))
    i = 1
    while True:
        span = pq("div.New_information span:eq(%s)" % i)
        a = pq("div.New_information a:eq(%s)" % i)
        i += 1

        newsUrl = a.attr("href")
        if not newsUrl:
            break 
        title = a.text()
        postDate = span.text()
        newsUrl = "http://www.china-sss.com" + newsUrl
        if checkNewsUrl(cursor, newsUrl):
            break            
        sourceFile = urllib.urlopen(newsUrl).read()
        newspq = PyQuery(unicode(sourceFile, "utf-8"))
        content = newspq("dl#content_top dd").text()
        source = ""
          
        executeSql(cursor, title, postDate, content, airport, source, newsUrl)
        pass
    conn.commit()
    conn.close()
    pass

def dongfanghangkong():
    """东方航空"""
    airport = "东方航空"
    newsPageUrl = "http://www.ce-air.com/mu/main/gydh/xwgg/index.html"
    index = 0
    while index != -1:  
        if index != 0:
            newsPageUrl = ("http://www.ce-air.com/"
                         "mu/main/gydh/xwgg/index_%s.html" % index)
             
        index += 1#下一个页面    
        pq = PyQuery(newsPageUrl)
        conn, cursor = mysqlDataBase_init()
        i = 0
        while True:
            a = pq("div.s_xw_list ul li:eq(%s)" % i).find("a")
            i += 1

            href = a.attr("href")
            if not href:
                break
            title = a.text()
            newsUrl = "http://www.ce-air.com/mu/main/gydh/xwgg/" + href[2:]    
            postDate = re.search(r"[\d]{8}", newsUrl).group()
            if postDate < earliestTime.replace("-", ""):
                index = -1#外层循环结束标记
                break 
            if checkNewsUrl(cursor, newsUrl):
                index = -1#外层循环结束标记
                break                      
            newspq = PyQuery(newsUrl)
            content = newspq("div.body p:eq(1)").text()
            if content == None:
                content = newspq("div.body p:eq(0)").text()
            if len(content) == 0:
                    content = newspq("div.body").text()
                    content = content[content.rfind("}") + 1:]
                
            table = newspq("div.body table").text()           
            source = re.search("'.*'", table).group()[1:-1]
            
            executeSql(cursor, title, postDate, content, airport, source, newsUrl)
            pass
        conn.commit()
        conn.close()
        pass
    pass

def shanghaihangkong():
    """上海航空
没有新闻，只有快讯"""
    airport = "上海航空"
    newsPageUrl = "http://www.shanghai-air.com/cmsweb/sale.aspx?pageIndex=0"
    sourceFile = urllib.urlopen(newsPageUrl).read()
    pq = PyQuery(unicode(sourceFile, "utf-8"))
    
    conn, cursor = mysqlDataBase_init()
    i = 0
    while True:
        a = pq("div.items li:eq(%s)" % i).find("a")
        i += 1

        newsUrl = a.attr("href")
        title = a.attr("title")
        if not newsUrl:
            break
        newsUrl = "http://www.shanghai-air.com/cmsweb/" + newsUrl
        if checkNewsUrl(cursor, newsUrl):
            break            
        sourceFile = urllib.urlopen(newsUrl).read()
        newspq = PyQuery(unicode(sourceFile, "utf-8"))
        content = newspq("div.nowtext").text()
        if not content:
            content = ""
        postDate = time.strftime("%Y%m%d")
        source=""
        
        executeSql(cursor, title, postDate, content, airport, source, newsUrl)
        pass
    conn.commit()
    conn.close()
    pass

    """上海航空旧版的新闻"""
    airport = "上海航空"
    
    newsPageUrl = "http://ww1.shanghai-air.com/news/saldt.asp?ttt="
    matchPostDate = re.compile(r"[\d]+-[\d]+-[\d]+")
    
    conn, cursor = mysqlDataBase_init()
    pq = PyQuery(newsPageUrl)
    i = 0
    while True:
        a = pq("a:eq(%s)" % i)
        i += 1
        newsUrl = a.attr("href")
        if not newsUrl:
            break
        if not newsUrl.find("PublicInfo") != -1:
            continue
        newsUrl = "http://ww1.shanghai-air.com" + newsUrl
        if checkNewsUrl(cursor, newsUrl):
            break
        title = a.text()[1:]
        postDate = parseDate(matchPostDate.search(a.text()).group())
        title = title[:-(len(postDate) + 2)]
        newspq = PyQuery(newsUrl)
        content = newspq("div.texttext").text()
        source = ""
        if not content:
            continue
        
        executeSql(cursor, title, postDate, content, airport, source, newsUrl)
        pass
    conn.commit()
    conn.close()
    pass
def zhongguohangkong():
    """中国航空
有时会出现urlopen error"""
    airport = "中国航空"
    newsPageUrl = "http://ffp.airchina.com.cn/cms/ffp/jszx/xwxx/default.html"
    
    conn, cursor = mysqlDataBase_init()
    count=0
    while count!=10:
        try:    
            pq = PyQuery(newsPageUrl)
        except urllib2.URLError:
            count+=1
            time.sleep(5)
        else:
            break   
    i = 0
    while True:
        newsUrl = pq("a:eq(%s)" % i).attr("href")   
        if not newsUrl:            
            break
        span = pq("span:eq(%s)" % i).text()
        i += 1
        postDate = re.search(r"[\d]{4}-[\d]{2}-[\d]{2}", span).group()
        if postDate < earliestTime:
            break
        title = span[:-14]
        
        newsUrl = "http://ffp.airchina.com.cn" + newsUrl   
        if checkNewsUrl(cursor, newsUrl):
            break   
        count=0   
        while count!=10:
            try:    
                newspq = PyQuery(newsUrl)
            except urllib2.URLError:
                count+=1
                time.sleep(5)
            else:
                break          
        content = newspq("body").text()
        source = ""   
             
        executeSql(cursor, title, postDate, content, airport, source, newsUrl)        
        pass
    conn.commit()
    conn.close()
    pass

def sichuanhangkong():
    """四川航空"""
    airport = "四川航空"
    newsPageUrl = "http://www.scal.com.cn/ScalB2CWeb/News/More_News.aspx?code=NC00391"
    
    pq = PyQuery(urllib.urlopen(newsPageUrl).read())
    ul = pq("ul.news_n")
    
    conn, cursor = mysqlDataBase_init()
    match = re.compile(r"[\d]{4}")
    i = 0
    while True:
        a = pq("ul.news_n a:eq(%s)" % i)
        i += 1
        id = a.attr("onclick")
        if not id:
            break
        
        id = match.search(id).group()
        newsUrl = "http://www.scal.com.cn/Scal.WebMaster/FileUpLoad/htmlpage/%s.html"
        newsUrl = newsUrl % id  
        if checkNewsUrl(cursor, newsUrl):
            break           
        newspq = PyQuery(newsUrl)
        title = newspq("span.NewsTitle").text()
        postDate = newspq("span.NewsInfo_Time").text()
        postDate = parseDate(postDate)[5:]
        source = newspq("span.NewsInfo_Publisher").text()
        content = newspq("div.NewsContent").text()
        
        executeSql(cursor, title, postDate, content, airport, source, newsUrl)
        pass
    conn.commit()
    conn.close()
    pass
    
def tianjinhangkong():
    """天津航空"""
    airport = "天津航空"
    
    #天津航空官网上的的新闻
    url = "http://www.tianjin-air.com/index/ann!doAjaxQuery.action"
    pq = PyQuery(urllib.urlopen(url).read())
    match = re.compile("'.*'")
    conn, cursor = mysqlDataBase_init()
    i = 0
    while True:
        a = pq("a:eq(%s)" % i)
        i += 1
        title = a.attr("title")
        newsUrl = a.attr("href")
        if not newsUrl:
            break
        
        newsUrl = match.search(newsUrl).group()[1:-1]
        newsUrl = "http://www.tianjin-air.com" + newsUrl
        if checkNewsUrl(cursor, newsUrl):
            break             
        content = urllib.urlopen(newsUrl).read()
        begin = content.find("Date:")
        postDate = content[begin:content.find("\n", begin)][6:-6].strip()
        try:
            postDate = time.strftime("%Y-%m-%d", time.strptime(postDate, "%a, %m %b %Y %H:%M:%S"))
        except:
            matchp=re.search(r"[\d]+-[\d]+-[\d]+", postDate)
            if matchp:
                postDate=matchp.group()
            else:
                postDate=time.strftime("%Y%m%d")
        
        content = content[content.find("\n", content.find("X-MimeOLE")):].strip() 
        #若是base64需要解码
        content = base64.decodestring(content)  
        try:
            newspq = PyQuery(content)
        except:
            continue
        content = newspq("body").text()
        if not content:
            continue
        source=""
        
        executeSql(cursor, title, postDate, content, airport, source, newsUrl)
        pass
    conn.commit()
    conn.close()
    pass
def shandonghangkong():
    """山东航空"""
    airport = "山东航空"
    
    headerString = """Accept:*/*
Accept-Charset:GBK,utf-8;q=0.7,*;q=0.3
Accept-Encoding:gzip,deflate,sdch
Accept-Language:zh-CN,zh;q=0.8
Connection:keep-alive
Content-Length:176
Content-Type:application/x-www-form-urlencoded
Cookie:JSESSIONID=0000Dva3t-EvL-J6jQ5uEa8YppU:-1; GUEST_LANGUAGE_ID=zh_CN; COOKIE_SUPPORT=true; __ozlvd671=1302764071; __ozlvd=1302764071
Host:www.shandongair.com
Method:POST /c/portal/render_portlet HTTP/1.1
Origin:http://www.shandongair.com
Referer:http://www.shandongair.com/web/shair_zh/news
User-Agent:Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16"""
    headerString = headerString.split("\n")

    dataString = """p_l_id:89978
p_p_id:journal_news_INSTANCE_1h76
p_p_action:0
p_p_state:normal
p_p_mode:view
p_p_col_id:column-3
p_p_col_pos:2
p_p_col_count:3
currentURL:/web/shair_zh/news"""  
    dataString = dataString.split("\n")

    dataDict = {}
    for line in dataString:
        line = line.split(":")
        dataDict[line[0]] = line[1]
    
    
    url = "http://www.shandongair.com/c/portal/render_portlet"
    request = urllib2.Request(url)
    for line in headerString:
        line = line.strip().split(":")
        request.add_header(line[0], line[1])  
        
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor()) 
    
    page = 1
    while page != -1:
        dataDict["cur"] = "%s" % page
        data = urllib.urlencode(dataDict)
        page += 1
        
        sourceFile = opener.open(request, data).read()    
        conn, cursor = mysqlDataBase_init()
        pq = PyQuery(unicode(sourceFile, "utf8"))
        i = 0
        while True:
            a = pq("table.taglib-search-iterator a:eq(%s)" % i)
            span = pq("table.taglib-search-iterator span:eq(%s)" % i)
            i += 1
            
            newsUrl = a.attr("href")
            if not newsUrl:
                break
            newsUrl = "http://www.shandongair.com" + newsUrl
            if checkNewsUrl(cursor, newsUrl):
                page = -1#终止条件
                break
            title = a.text()
            postDate = span.text()
            if postDate < earliestTime:
                page = -1#终止条件
                break
            sourceFile = urllib.urlopen(newsUrl).read()
            newspq = PyQuery(unicode(sourceFile, "utf8"))
            content = newspq("td.main").text()
            source = ""
            
            executeSql(cursor, title, postDate, content, airport, source, newsUrl)
            pass
        conn.commit()
        conn.close()        
        pass 


    #山东航空的首页新闻
    newsPageUrl = "http://www.shandongair.com/"
    conn, cursor = mysqlDataBase_init()
    pq = PyQuery(newsPageUrl)
    i = 0
    while True:
        a = pq("div.ind_news_box_center a:eq(%s)" % i)
        i += 1
        newsUrl = a.attr("href")
        if not newsUrl:
            break
        if checkNewsUrl(cursor, newsUrl):
            break 
        sourceFile = urllib.urlopen(newsUrl).read()
        sourceFile = unicode(sourceFile, "utf-8")
        newspq = PyQuery(sourceFile)
        content = newspq("body table:eq(1)").text()[:-6]
        title = newspq("title").text()
        
        m = re.search(r"[\d]{4}年[\d]+月[\d]+日", content)
        if m:
            postDate = m.group()
            postDate = parseDate(postDate)
        else:
            postDate = time.strftime("%Y%m%d")
        source = ""
    
        executeSql(cursor, title, postDate, content, airport, source, newsUrl)
        pass 
    conn.commit()
    conn.close()
    pass

def jixianghangkong():
    """吉祥航空
没有看见新闻，就找了促销信息"""

    airport = "吉祥航空" 
    page = 1
    while page != -1:
        newsPageUrl = "http://www.juneyaoair.com/JY_Airlines_dtlist.jsp?page=%s&menu_no=203" % page
        page += 1
        
        try:
            pq = PyQuery(newsPageUrl) 
        except:
            break
        conn, cursor = mysqlDataBase_init()
        table = pq("table:eq(18)")
        trindex = 1
        while True:
            tr = table("tr:eq(%s)" % trindex)
            trindex += 1
            if not tr:
                break          
            a = tr("td a")
            newsUrl = "http://www.juneyaoair.com/" + a.attr("href")
            
            title = a.text()
            source = tr("td:eq(3)").text()
            postDate = tr("td:eq(4)").text()
            if not postDate:
                break
            if postDate < earliestTime:
                page = -1#终止条件
                break
            if checkNewsUrl(cursor, newsUrl):
                page = -1#终止条件
                break           
            newspq = PyQuery(newsUrl)
            content = newspq("table:eq(6)").text()

            executeSql(cursor, title, postDate, content, airport, source, newsUrl)
            pass
        conn.commit()
        conn.close()
        pass
    pass

def nanfanghangkong():
    """南方航空
还有一定的问题"""
    airport = "南方航空"
    
    year = int(time.strftime("%Y"))
    match = re.compile(r"[\d]{4}-[\d]{2}-[\d]{2}")
    
    conn, cursor = mysqlDataBase_init()
    while str(year) >= earliestTime[:4]:
        newsBasePage = "http://www.csair.com/cn/aboutcsn/04/newsCompany/%s/list_news_%s.asp" % (year, year)
        sourceFile = urllib.urlopen(newsBasePage).read()
        pq = PyQuery(unicode(sourceFile, "utf8"))
        liindex = 0
        while True:
            li = pq("li:eq(%s)" % liindex)
            liindex += 1
            a = li("a")
            newsUrl = a.attr("href")
            if not newsUrl:
                break
            newsUrl = "http://www.csair.com" + newsUrl    
            postDate = match.search(li.text()).group()
            if postDate < earliestTime:
                return
            if checkNewsUrl(cursor, newsUrl):
                return             
            newspq = PyQuery(newsUrl) 
            content = newspq("#content").text()       
            title = content.split()[2]
            source = ""    
            if content.find("InstanceBeginEditable") != -1:
                content = content[len("InstanceBeginEditable name=\"con\" "):]
            if content.find("InstanceEndEditable") != -1:
                content = content[:-len("InstanceEndEditable")]

            executeSql(cursor, title, postDate, content, airport, source, newsUrl)
            pass
        year -= 1
        pass
    conn.commit()
    conn.close()
    pass

def chunqiuhangkongwang():
    """春秋航空网"""
    airport = "春秋航空"
    source = "春秋航空网"
    newsBasePageUrl = "http://info.china-sss.com/article_ps.do?xsl=topic_first_ilist_e&pt=1&ps=30&pi=%s&sf=seq_num&sfdt=number&st=ascending&cid=8069761218&bid=1&qtopic=3&qtit=&qct1=&qct2=#"

    conn, cursor = mysqlDataBase_init()
    sourceMatch = re.compile(r"来源：.*\[")
    page = 1
    while page != -1:
        newsPageUrl = newsBasePageUrl % page       
        page += 1
        
        sourceFile = urllib.urlopen(newsPageUrl).read() 
        pq = PyQuery(unicode(sourceFile, "gbk"))
        i = 0
        while True:
            a = pq("a:eq(%s)" % i)
            tr = pq("td#container1 tr:eq(%s)" % i)
            i += 1
            
            newsUrl = a.attr("href")
            if not newsUrl:
                break
            if len(newsUrl) == 1:
                continue    
            title = a.text()       
            newsUrl = "http://info.china-sss.com" + newsUrl
            postDate = "20" + tr("td:eq(2)").text()
            if postDate < earliestTime:
                page = -1
                break    
            if checkNewsUrl(cursor, newsUrl):
                page = -1
                break  
            sourceFile = urllib.urlopen(newsUrl).read()
            newspq = PyQuery(unicode(sourceFile, "gbk"))
            content = newspq("td.fontmain1").text()
            if content.find("function showContent()") != -1:
                content = content[:content.find("function showContent()")]
            
            sourceInContent = sourceMatch.search(content)
            if sourceInContent:
                source = sourceInContent.group()[len("来源："):-1]
            if source.find("】") != -1:
                source = source[:-len("】")]
            content = content.replace("[/链接]", "")
            if content.find("[链接 地址=") != -1:
                content = content[:-content.find("[链接 地址=")]        
            
            executeSql(cursor, title, postDate, content, airport, source, newsUrl)
            #恢复默认
            source = "春秋航空网"
            pass 
        pass
    conn.commit()
    conn.close()
    pass


def minhangziyuanwang():
    source = "民航资源网"
    matchPostDate = re.compile(r"[\d]+-[\d]+-[\d]+")
    conn, cursor = mysqlDataBase_init()
    
    au = ["天津航空", "吉祥航空", "厦门航空", "四川航空", "中国航空"]

    for airport in au:
        url = "http://news.carnoc.com/search.jsp?key=%s&querytype=0&page=1"    
        url = url % (urllib.quote(airport.decode("utf8").encode("gb2312")))

        sourceFile = urllib.urlopen(url).read()
        pq = PyQuery(sourceFile)
        index = 0
        while True:
            li = pq("div.text li:eq(%s)" % index)
            index += 1
            if not li:
                break
            newsUrl = li("a").attr("href")

            title = li("a").text()
            match = matchPostDate.search(li.text())
            if not match:
                continue
            postDate = match.group()
            if newsUrl.find("2000") != -1:
                continue
            if checkNewsUrl(cursor, newsUrl):
                break
            
            sourceFile = urllib.urlopen(newsUrl).read()
            newspq = PyQuery(sourceFile)
            content = newspq("div#newstext").text()
            
            if not content:
                continue
            if len(content) < 100:
                continue
            executeSql(cursor, title, postDate, content, airport, source, newsUrl)
            pass 
    
    #恢复默认   
    source = ""
    conn.commit()
    conn.close()
    pass

def crawl_news():
    startTime = time.time()
    #最总版本使用的
    import jipiao
    threads = []
    #添加线程
    threads.append(threading.Thread(target=minhangziyuanwang))
    for f in dir(jipiao):
        if f.find("hangkong") != -1:
            exec("threads.append(threading.Thread(target=%s))" % f)
    #启动线程
    for t in threads:
        t.start()
    #等待线程结束
    for t in threads:
        t.join()
        
    printf((time.time() - startTime))    
    pass

if __name__ == "__main__":
    startTime = time.time()
    crawl_news()
    
    #写日志
    if not os.path.exists("logs"):
        os.mkdir("logs")
    logs = file("./logs/logs.txt", "a")
    now = time.strftime("%Y-%m-%d %H:%M:%S")
    logs.write("%s\t耗时:%sms\n" % (now, time.time() - startTime))
    logs.close()
    
    printf(time.time() - startTime)

#    chongqinghangkong()
#    chunqiuhangkong()
#    chunqiuhangkongwang()#春秋航空网
#    dongfanghangkong()
#    hannanhangkong()
#    jixianghangkong()
#    nanfanghangkong()
#    shandonghangkong()#ajax不易处理,使用post
#    shanghaihangkong()
#    shanghaihangkong_old()#上海航空旧版
#    shenzhenhangkong()
#    sichuanhangkong()
#    tianjinhangkong()
#    xiamenhangkong()
#    xingfuhangkong()
#    zhongguohangkong()
#    minhangziyuanwang()

创建数据的SQL

CREATE TABLE IF NOT EXISTS `jipiaonews` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `title` text NOT NULL,
  `postDate` datetime NOT NULL,
  `content` text NOT NULL,
  `airport` varchar(255) NOT NULL,
  `source` varchar(255) NOT NULL,
  `newsUrl` varchar(255) NOT NULL DEFAULT '',
  PRIMARY KEY (`id`)
) ENGINE=MyISAM  DEFAULT CHARSET=utf8;

PHP与Java的区别分析 Monika Zhang java 架构设计 php java 开发语言
一、语言特点php：一种的像Python的动态弱语言类型的服务器脚本语言，不需要编译代码；它是专为Web开发目的而开发和设计的，而且简单容易上手。Java：是一种通用的面向对象编程语言，属于强势优选语言类型，在执行前必须先正确编译。是面向对象的和人类可读的；支持服务器端和客户端；可用于开发独立应用程序或基于Web的应用程序，上手比PHP难。二、语法1.PHP是一种脚本语言，代码在服务器上执行，而结
闭包的概念总结与分析 Monika Zhang java java
1定义闭包又称词法闭包闭包最早定义为一种包含和的实体.在计算机科学中，闭包（英语：Closure），又称词法闭包（LexicalClosure）或函数闭包（functionclosures），是引用了自由变量的函数。解释一：闭包是引用了自由变量的函数，这个被引用的变量将和这个函数一同存在。解释二：闭包是函数和相关引用环境组成的实体。注：：除了局部变量的其他变量《Python核心编程》对闭包的解释:
毕业论文代码实验（Python\MATLAB）基于K-means聚类的EMD-BiLSTM-Attention光伏功率预测模型清风AI 毕业设计代码实现 python lstm 深度学习神经网络人工智能 matlab pytorch
一、项目背景1.1光伏功率预测意义在能源结构转型背景下（国家能源局2025规划），光伏发电渗透率已超过18%。但受天气突变、云层遮挡等因素影响，光伏出力具有显著波动性，导致：电网调度难度增加（±15%功率波动）电力市场交易风险提升光储协同控制效率降低1.2技术挑战多尺度特征耦合：分钟级辐照度变化与小时级天气模式共存非线性映射关系：气象因素与发电功率呈高阶非线性关系数据模态差异：数值天气预报(NWP
完美解决 mysql 报错ERROR 1524 (HY000) Plugin ‘mysql_native_password‘ is not loaded m0_74823490 mysql adb android
文章目录错误描述错误原因解决步骤跟着我下面的步骤走，解决你的问题，如果解决不了私信我来给你解决错误描述执行ALTERUSER'root'@'%'IDENTIFIEDWITHmysql_native_passwordBY'123456';报错ERROR1524(HY000):Plugin'mysql_native_password'isnotloaded原因如下错误原因这个错误主要由以下几个原因造成
DeepSeek 部署指南 (使用 vLLM 本地部署) AGI大模型资料分享员人工智能语言模型学习 chatgpt 深度学习大模型 deepseek
DeepSeek部署指南(使用vLLM本地部署)本文档将指导您如何使用vLLM在本地部署DeepSeek语言模型。我们以deepseek-ai/DeepSeek-R1-Distill-Qwen-7B模型为例进行演示。1、安装Python环境首先，您需要安装Python环境。访问Python官网:https://www.python.org/根据您的操作系统选择安装包:Python官网提供Windo
同步MySQL数据至Elasticsearch：go-mysql-elasticsearch实战指南吴镇业
同步MySQL数据至Elasticsearch：go-mysql-elasticsearch实战指南go-mysql-elasticsearchSyncMySQLdataintoelasticsearch项目地址:https://gitcode.com/gh_mirrors/go/go-mysql-elasticsearch项目介绍go-mysql-elasticsearch是一个服务，能够自动将
将MySQL数据同步到Elasticsearch作为全文检索数据的实战指南格子先生Lab 全文检索 mysql elasticsearch
在现代应用中，全文检索是一个非常重要的功能，尤其是在处理大量数据时。Elasticsearch是一个强大的分布式搜索引擎，能够快速地进行全文检索、分析和可视化。而MySQL作为传统的关系型数据库，虽然能够处理结构化数据，但在全文检索方面的性能不如Elasticsearch。因此，将MySQL中的数据同步到Elasticsearch中，可以充分发挥两者的优势。本文将介绍如何将MySQL中的数据同步到
设计模式-抽象工厂模式(Abstract Factory) 树懒_Zz 设计模式抽象工厂模式设计模式
抽象工厂模式是一种创建型设计模式，它能创建一系列相关的对象，而无需指定其具体类。譬如你正在开发一款家具组装系统。代码中包括一些产品：椅子（chair）,沙发（sofa）和咖啡桌（CoffeeTable）然后基于这些产品形成一些列组合套餐装饰风艺术(ArtDeco)、维多利亚（Victorian）和现代（Mdern）。你需要设法单独生成每件家具对象，这样才能确保其风格一致。如果顾客收到的家具风格不一
Python基础知识点总结豆芽819 tip python 开发语言
1Python简介Python特点：解释型语言：无需编译，逐行执行。动态类型：变量类型在运行时确定。简洁易读：语法接近自然语言，代码简洁。跨平台支持：Windows/Linux/macOS均可运行。应用领域：Web开发、数据分析、人工智能、自动化脚本等。开发环境：推荐使用IDLE、PyCharm、VSCode或JupyterNotebook。2Python数值运算基本运算符：算术：+,-,*,/,
Python与区块链隐私保护技术：如何在去中心化世界中保障数据安全 Echo_Wish Python！实战！区块链 python 去中心化
Python与区块链隐私保护技术：如何在去中心化世界中保障数据安全在区块链世界里，透明性和不可篡改性是两大核心优势，但这也带来了一个悖论——如何在公开账本的同时保障用户隐私？如果你的交易记录对所有人可见，如何防止敏感信息泄露？Python作为区块链开发中最受欢迎的语言之一，提供了强大的工具和库来增强隐私保护。本文将深入探讨区块链的隐私保护技术，并结合Python代码示例，带你了解如何在Web3时代
人脸识别的一些代码饿了就干饭 CV相关人脸识别
1、cv2入门函数imread及其相关操作2、（详解）opencv里的cv2.resize改变图片大小Python3、机器学习之人脸识别face_recognition使用4、使用face_recognition进行人脸校准5、简单的人脸识别通用流程示意图（这个看着写的挺好的）6、face_recognition和图像处理中left、top、right、bottom解释7、使用pillow库对图片
Python从入门到精通的系统性学习路径 niuTaylor 编程区 python 学习开发语言
Python从入门到精通的系统性学习路径一、基础语法快速突破1.变量与基础操作#动态类型演示a=10#整型a=3.14#浮点型a="Python"#字符串a=[1,2,3]#列表#格式化输出进阶name="Alice"print(f"{name:*^20}")#居中填充输出：******Alice*******2.运算符优先级实战#常见运算符优先级练习result=5+3*2**2//(4%3)p
Python技术全景解析：从基础到前沿的深度探索靠近彗星 python 开发语言性能优化个人开发极限编程
目录一、Python为何成为开发者首选？1.核心优势矩阵2.性能进化史二、Python核心应用领域1.数据科学黄金三角2.AI开发新范式三、现代Python进阶技巧1.类型提示革命2.异步编程实战四、Python工程化实践1.现代项目架构2.性能优化矩阵五、Python未来生态展望1.前沿技术融合2.性能革命六、学习路线图1.技能成长路径基础阶段（1-3月）专业方向（3-6月）深度进阶（6-12月
如何使用DeepSeek编写测试用例？海姐软件测试 deepseek 大数据测试工具
一、DeepSeek在测试用例设计中的定位DeepSeek作为AI工具，并非直接替代测试设计，而是通过以下方式提升效率：快速生成基础用例框架（等价类、边界值等）智能补充易遗漏场景（如特殊字符、异常流）自动化脚本片段生成（Python/pytest/JUnit等）测试数据构造建议（符合业务规则的Mock数据）二、四步法实战：AI协作编写测试用例Step1：明确需求输入输入质量决定输出质量，需向Dee
Explore Model-Based Feature Importance 后端
Question1.ExploreModel-BasedFeatureImportanceThroughoutthisquestion,youmayonlyusePython.Foreachsub-question,providecommentary(ifneeded)alongwithscreenshotsofthecodeused.Pleasealsoprovideacopyofthecode
Python 标准库之 logging 模块 36度道 python系列学习笔记 python
1.logging模块简介在软件开发过程中，了解程序的运行状态、记录重要事件以及排查错误是至关重要的。logging模块为Python提供了灵活且强大的日志记录功能。它允许开发者控制日志的输出内容、输出位置（如文件、控制台）、日志级别（用于过滤不同重要程度的日志信息）等，帮助开发者更好地监控和调试程序。2.基本使用简单配置与输出：importlogging#配置日志基本设置logging.basi
python 标准库之 functools 模块 36度道 python系列学习笔记 python
functools模块提供了一系列用于处理函数的工具。其中，像partial可以创建一个新的可调用对象，这个对象固定了原函数的部分参数，有点像给函数穿上了“参数防护服”；reduce能对一个序列进行累积计算，就好比是一个勤劳的小会计，按顺序把序列里的数加起来或者做其他运算；wraps主要用于装饰器，它能帮助装饰器函数保留被装饰函数的元信息，比如函数名、文档字符串等，让被装饰函数“表里如一”。底层原
测试工程师Ai应用实战指南简例prompt 进击的雷神 prompt
阅读原文以下是一个真实具体的案例，展示测试工程师如何在不同阶段结合DeepSeek提升效率。案例基于电商平台"订单超时自动关闭"功能测试：案例背景项目名称：电商平台订单系统V2.3测试目标：验证"用户下单后30分钟未支付，订单自动关闭并释放库存"功能技术栈：SpringBoot+MySQL+Redis延迟队列1.需求分析阶段痛点：需求文档仅描述业务逻辑，未明确异常场景（如服务器时间不同步、Redi
Python——函数生如雪花 Python python
一、十进制小数转换成二进制小数【问题描述】编写程序，输入十进制小数（只考虑正数），把它转换为以字符串形式存储的二进制小数，输出该二进制小数字符串。对于转换得到的二进制小数，小数点后最多保留10位。小数点后不足10位，则输出这些位，尾部不补0；小数点后超出10位，则直接舍弃超出部分。【输入形式】十进制浮点小数【输出形式】对应输入小数的二进制小数字符串。若整数部分或者小数部分为0，则输出0。比如输入0
Python Web框架 Flask vs Django vs FastAPI ZengDerby python flask fastapi django
如果您需要构建大型的、功能丰富的应用程序，Django可能是一个很好的选择。如果您需要更灵活的框架，可以选择Flask来定制开发。而对于追求极致性能和高并发处理的项目，FastAPI可能是一个更加理想的选择。优缺点Flask在小型项目或微服务理想的选择。Flask灵活且轻量，非常适合快速开发小型应用。Flask是一个非常灵活的框架，它允许您根据项目需求进行定制。您可以根据需要选择合适的插件和扩展。
MySQL数据库基本命令 Java从零开始数据库 mysql
1.CMD链接数据库：mysql-uroot-p2.打开全部数据库：showdatabases;3.切换数据库:use（school库）；4.显示数据库中所有表的信息：describe（student表）；5.查看数据库中所有的表：showtables；6.创建一个数据库：createdatabase（westos库）；7.SHOWCREATEDATABASEgoods：查看创建goods数据库的
python if用法 IT技术土狗 python从入门到入狱 python
pythonif用法流程控制流程控制即控制流程，具体指控制程序的执行流程，而程序的执行流程分为三种结构：顺序结构（之前我们写的代码都是顺序结构）、分支结构（用到if判断）、循环结构（用到while与for）1、分支结构分支结构就是根据条件判断的真假去执行不同分支对应的子代码2、为什么需要分支结构人类某些时候需要根据条件来决定做什么事情，比如：如果今天下雨，就带伞所以程序中必须有相应的机制来控制计算
python与数值有关的问题 cbxjsdg python
1.复数的问题x=123+456j#后面没加j部分为实数，加j部分为虚数print('实数部分',x.real)#表示实数print('虚数部分',x.imag)#表示虚数2.查看数值的类型a=10b=10.0c=1.99E2#表示1.99*10的二次方的意思，这是科学计数法print('数值为',a,'数值类型为',type(a))print('数值为',b,'数值类型为',type(b))pr
【架构设计】前置知识 GIS程序媛—椰子架构设计架构设计
架构设计是软件开发的进阶技能，需要结合理论知识和实践经验。以下是掌握架构设计所需的前置知识及其重要性，以及学习路径建议：一、基础编程能力1.编程语言与核心概念掌握至少一门主流语言（如Java、Python、C#、Go等），理解其语法、特性及生态。核心概念：面向对象（OOP）、函数式编程（FP）、并发/异步、内存管理等。示例：通过Java理解接口、多态、设计模式。通过Go学习并发模型（Gorouti
Python, C ++开发家庭开支 Geeker-2025 python c++
开发一款**家庭开支数字化记录与结算App**是一个非常有意义的项目，旨在帮助家庭用户高效管理开支、记录消费、分析财务状况，并提供结算和预算管理功能。以下是基于**Python**和**C++**的开发方案，结合两者在数据处理、实时通信和系统开发中的优势。---##1.**项目需求分析**家庭开支数字化记录与结算App的核心功能包括：1.**用户管理**：-用户注册、登录，支持家庭成员管理。2.*
linux执行python脚本conda库_Pycharm使用远程linux服务器conda/python环境在本地运行的方法(图解）)... weixin_39992462
Pycharm使用远程linux服务器conda/python环境在本地运行的方法(图解))1.首先在PycharmTools->Deployment->Configurations打开新建SFTP输入host:ip地址username密码然后点击TestConnection出现下图，则测试成功因为已经连接成功，这时候已经可以读取远程服务器的目录了：2.选择项目mapping(可以跳过3.在Set
brew mysql client_Mac安装mysqlclient过程解析 weixin_39630440 brew mysql client
尝试在虚拟环境下通过pip安装：pipinstallmysqlclient然后报错：OSError:mysql_confignotfound找到官方文档https://github.com/PyMySQL/mysqlclient-python，解释说安装前需安装另一个模块：brewinstallmysql-connector-c但是报错：查看报错信息，在安装mysql-connector-c前先b
macos安装python-nodejs_MAC平台基于Python Appium环境搭建过程图解 weixin_39612038
前言最近笔者要为python+appium课程做准备，mac在2019年重新安装了一次系统，这次重新在mac下搭建appium环境，刚好顺带写个文稿给大家分享分享搭建过程。一、环境和所需软件概述1.1目前环境：MacOS(10.15.3)1.2所需软件:jdk-8u91-macosx-x64.dmg(jdk1.8及以上版本应该都可以)android-sdk_r24.4.1-macosx.zip(m
全网最细！CentOS 7极速部署MySQL 8.0.23实战手册（附最佳参数模版）从不删库的DBA Mysql centos mysql linux
一、部署前准备1.1环境检查清单在进行MySQL部署前，请确认以下基础条件已满足：检查项标准要求操作系统版本CentOS6/7（推荐7.6+）内存建议≥4GB（生产环境≥16GB）磁盘空间/分区≥30GB，数据盘按需求规划网络连通性确保yum源可访问二、操作系统基础配置2.1关闭网络管理服务根据系统版本选择相应操作：▶CentOS6#serviceNetworkManagerstop停止Netwo
python接口自动化全世界最帅的男人 python 自动化开发语言
Python是一种非常流行的编程语言，也是许多接口自动化测试框架的首选语言。下面是一个简单的接口自动化测试框架的思路：1.安装必要的库和工具：在Python中，我们可以使用requests库来发送HTTP请求，使用unittest库来编写测试用例，使用HTMLTestRunner库来生成测试报告。此外，我们还需要安装一个代码编辑器，如PyCharm或VSCode。2.创建测试用例：编写测试用例是接
tomcat基础与部署发布暗黑小菠萝 Tomcat java web
从51cto搬家了，以后会更新在这里方便自己查看。做项目一直用tomcat，都是配置到eclipse中使用，这几天有时间整理一下使用心得，有一些自己配置遇到的细节问题。 Tomcat：一个Servlets和JSP页面的容器，以提供网站服务。一、Tomcat安装安装方式：①运行.exe安装包 &n
网站架构发展的过程 ayaoxinchao 数据库应用服务器网站架构
1.初始阶段网站架构：应用程序、数据库、文件等资源在同一个服务器上 2.应用服务和数据服务分离：应用服务器、数据库服务器、文件服务器 3.使用缓存改善网站性能：为应用服务器提供本地缓存，但受限于应用服务器的内存容量，可以使用专门的缓存服务器，提供分布式缓存服务器架构 4.使用应用服务器集群改善网站的并发处理能力：使用负载均衡调度服务器，将来自客户端浏览器的访问请求分发到应用服务器集群中的任何
[信息与安全]数据库的备份问题 comsci 数据库
如果你们建设的信息系统是采用中心-分支的模式,那么这里有一个问题如果你的数据来自中心数据库,那么中心数据库如果出现故障,你的分支机构的数据如何保证安全呢? 是否应该在这种信息系统结构的基础上进行改造,容许分支机构的信息系统也备份一个中心数据库的文件呢? &n
使用maven tomcat plugin插件debug关联源代码商人shang maven debug 查看源码 tomcat-plugin
*首先需要配置好'''maven-tomcat7-plugin'''，参见[[Maven开发Web项目]]的'''Tomcat'''部分。 *配置好后，在[[Eclipse]]中打开'''Debug Configurations'''界面，在'''Maven Build'''项下新建当前工程的调试。在'''Main'''选项卡中点击'''Browse Workspace...'''选择需要开发的
大访问量高并发 oloz 大访问量高并发
大访问量高并发的网站主要压力还是在于数据库的操作上，尽量避免频繁的请求数据库。下面简要列出几点解决方案： 01、优化你的代码和查询语句，合理使用索引 02、使用缓存技术例如memcache、ecache将不经常变化的数据放入缓存之中 03、采用服务器集群、负载均衡分担大访问量高并发压力 04、数据读写分离 05、合理选用框架，合理架构(推荐分布式架构)。
cache 服务器小猪猪08 cache
Cache 即高速缓存.那么cache是怎么样提高系统性能与运行速度呢？是不是在任何情况下用cache都能提高性能？是不是cache用的越多就越好呢？我在近期开发的项目中有所体会，写下来当作总结也希望能跟大家一起探讨探讨，有错误的地方希望大家批评指正。　　1.Cache 是怎么样工作的? 　　Cache 是分配在服务器上
mysql存储过程香水浓 mysql
Description:插入大量测试数据 use xmpl; drop procedure if exists mockup_test_data_sp; create procedure mockup_test_data_sp( in number_of_records int ) begin declare cnt int; declare name varch
CSS的class、id、css文件名的常用命名规则 agevs JavaScript UI 框架 Ajax css
CSS的class、id、css文件名的常用命名规则 (一)常用的CSS命名规则　　头：header 　　内容：content/container 　　尾：footer 　　导航：nav 　　侧栏：sidebar 　　栏目：column 　　页面外围控制整体布局宽度：wrapper 　　左右中：left right
全局数据源 AILIKES java tomcat mysql jdbc JNDI
实验目的：为了研究两个项目同时访问一个全局数据源的时候是创建了一个数据源对象，还是创建了两个数据源对象。 1：将diuid和mysql驱动包（druid-1.0.2.jar和mysql-connector-java-5.1.15.jar）copy至%TOMCAT_HOME%/lib下；2：配置数据源，将JNDI在%TOMCAT_HOME%/conf/context.xml中配置好,格式如下：&l
MYSQL的随机查询的实现方法 baalwolf mysql
MYSQL的随机抽取实现方法。举个例子，要从tablename表中随机提取一条记录，大家一般的写法就是：SELECT * FROM tablename ORDER BY RAND() LIMIT 1。但是，后来我查了一下MYSQL的官方手册，里面针对RAND()的提示大概意思就是，在ORDER BY从句里面不能使用RAND()函数，因为这样会导致数据列被多次扫描。但是在MYSQL 3.23版本中，
JAVA的getBytes()方法 bijian1013 java eclipse unix OS
在Java中，String的getBytes()方法是得到一个操作系统默认的编码格式的字节数组。这个表示在不同OS下，返回的东西不一样！ String.getBytes(String decode)方法会根据指定的decode编码返回某字符串在该编码下的byte数组表示，如： byte[] b_gbk = "
AngularJS中操作Cookies bijian1013 JavaScript AngularJS Cookies
如果你的应用足够大、足够复杂，那么你很快就会遇到这样一咱种情况：你需要在客户端存储一些状态信息，这些状态信息是跨session(会话)的。你可能还记得利用document.cookie接口直接操作纯文本cookie的痛苦经历。幸运的是，这种方式已经一去不复返了，在所有现代浏览器中几乎
[Maven学习笔记五]Maven聚合和继承特性 bit1129 maven
Maven聚合在实际的项目中，一个项目通常会划分为多个模块，为了说明问题，以用户登陆这个小web应用为例。通常一个web应用分为三个模块： 1. 模型和数据持久化层user-core, 2. 业务逻辑层user-service以 3. web展现层user-web， user-service依赖于user-core user-web依赖于user-core和use
【JVM七】JVM知识点总结 bit1129 jvm
1. JVM运行模式 1.1 JVM运行时分为-server和-client两种模式，在32位机器上只有client模式的JVM。通常，64位的JVM默认都是使用server模式，因为server模式的JVM虽然启动慢点，但是，在运行过程，JVM会尽可能的进行优化 1.2 JVM分为三种字节码解释执行方式：mixed mode, interpret mode以及compiler
linux下查看nginx、apache、mysql、php的编译参数 ronin47
在linux平台下的应用，最流行的莫过于nginx、apache、mysql、php几个。而这几个常用的应用，在手工编译完以后，在其他一些情况下（如：新增模块），往往想要查看当初都使用了那些参数进行的编译。这时候就可以利用以下方法查看。 1、nginx [root@361way ~]# /App/nginx/sbin/nginx -V nginx: nginx version: nginx/
unity中运用Resources.Load的方法？ brotherlamp unity视频 unity资料 unity自学 unity unity教程
问：unity中运用Resources.Load的方法？答：Resources.Load是unity本地动态加载资本所用的方法,也即是你想动态加载的时分才用到它,比方枪弹,特效,某些实时替换的图像什么的,主张此文件夹不要放太多东西,在打包的时分,它会独自把里边的一切东西都会集打包到一同,不论里边有没有你用的东西,所以大多数资本应该是自个建文件放置 1、unity实时替换的物体即是依据环境条件
线段树-入门 bylijinnan java 算法线段树
/** * 线段树入门 * 问题：已知线段[2,5] [4,6] [0,7]；求点2,4,7分别出现了多少次 * 以下代码建立的线段树用链表来保存，且树的叶子结点类似[i,i] * * 参考链接：http://hi.baidu.com/semluhiigubbqvq/item/be736a33a8864789f4e4ad18 * @author lijinna
全选与反选 chicony 全选
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <title>全选与反选</title>
vim一些简单记录 chenchao051 vim
mac在/usr/share/vim/vimrc linux在/etc/vimrc 1、问：后退键不能删除数据，不能往后退怎么办？答：在vimrc中加入set backspace=2 2、问：如何控制tab键的缩进？答：在vimrc中加入set tabstop=4 (任何
Sublime Text 快捷键 daizj 快捷键 sublime
[size=large][/size]Sublime Text快捷键：Ctrl+Shift+P：打开命令面板Ctrl+P：搜索项目中的文件Ctrl+G：跳转到第几行Ctrl+W：关闭当前打开文件Ctrl+Shift+W：关闭所有打开文件Ctrl+Shift+V：粘贴并格式化Ctrl+D：选择单词，重复可增加选择下一个相同的单词Ctrl+L：选择行，重复可依次增加选择下一行Ctrl+Shift+L：
php 引用(&)详解 dcj3sjt126com PHP
在PHP 中引用的意思是：不同的名字访问同一个变量内容. 与Ｃ语言中的指针是有差别的．Ｃ语言中的指针里面存储的是变量的内容在内存中存放的地址变量的引用 PHP 的引用允许你用两个变量来指向同一个内容复制代码代码如下: <? $a="ABC"; $b =&$a; echo
SVN中trunk,branches,tags用法详解 dcj3sjt126com SVN
Subversion有一个很标准的目录结构，是这样的。比如项目是proj，svn地址为svn://proj/，那么标准的svn布局是svn://proj/|+-trunk+-branches+-tags这是一个标准的布局，trunk为主开发目录，branches为分支开发目录，tags为tag存档目录（不允许修改）。但是具体这几个目录应该如何使用，svn并没有明确的规范，更多的还是用户自己的习惯。
对软件设计的思考 e200702084 设计模式数据结构算法 ssh 活动
软件设计的宏观与微观软件开发是一种高智商的开发活动。一个优秀的软件设计人员不仅要从宏观上把握软件之间的开发，也要从微观上把握软件之间的开发。宏观上，可以应用面向对象设计，采用流行的SSH架构，采用web层，业务逻辑层，持久层分层架构。采用设计模式提供系统的健壮性和可维护性。微观上，对于一个类，甚至方法的调用，从计算机的角度模拟程序的运行情况。了解内存分配，参数传
同步、异步、阻塞、非阻塞 geeksun 非阻塞
同步、异步、阻塞、非阻塞这几个概念有时有点混淆，在此文试图解释一下。同步：发出方法调用后，当没有返回结果，当前线程会一直在等待（阻塞）状态。场景：打电话，营业厅窗口办业务、B/S架构的http请求-响应模式。异步：方法调用后不立即返回结果，调用结果通过状态、通知或回调通知方法调用者或接收者。异步方法调用后，当前线程不会阻塞，会继续执行其他任务。实现：
Reverse SSH Tunnel 反向打洞實錄 hongtoushizi ssh
實際的操作步驟： # 首先，在客戶那理的機器下指令連回我們自己的 Server，並設定自己 Server 上的 12345 port 會對應到幾器上的 SSH port ssh -NfR 12345:localhost:22 fred@myhost.com # 然後在 myhost 的機器上連自己的 12345 port，就可以連回在客戶那的機器 ssh localhost -p 1
Hibernate中的缓存 Josh_Persistence 一级缓存 Hiberante缓存查询缓存二级缓存
Hibernate中的缓存一、Hiberante中常见的三大缓存：一级缓存，二级缓存和查询缓存。 Hibernate中提供了两级Cache，第一级别的缓存是Session级别的缓存，它是属于事务范围的缓存。这一级别的缓存是由hibernate管理的，一般情况下无需进行干预；第二级别的缓存是SessionFactory级别的缓存，它是属于进程范围或群集范围的缓存。这一级别的缓存
对象关系行为模式之延迟加载 home198979 PHP 架构延迟加载
形象化设计模式实战 HELLO!架构一、概念 Lazy Load：一个对象，它虽然不包含所需要的所有数据，但是知道怎么获取这些数据。延迟加载貌似很简单，就是在数据需要时再从数据库获取，减少数据库的消耗。但这其中还是有不少技巧的。二、实现延迟加载实现Lazy Load主要有四种方法：延迟初始化、虚
xml 验证 pengfeicao521 xml xml解析
有些字符，xml不能识别，用jdom或者dom4j解析的时候就报错 public static void testPattern() { // 含有非法字符的串 String str = "Jamey친Ñ&#1282
div设置半透明效果 spjich css 半透明
为div设置如下样式： div{filter:alpha(Opacity=80);-moz-opacity:0.5;opacity: 0.5;} 说明： 1、filter：对win IE设置半透明滤镜效果，filter:alpha(Opacity=80)代表该对象80%半透明，火狐浏览器不认2、-moz-opaci
你真的了解单例模式么？ w574240966 java 单例设计模式 jvm
单例模式，很多初学者认为单例模式很简单，并且认为自己已经掌握了这种设计模式。但事实上，你真的了解单例模式了么。一，单例模式的5中写法。（回字的四种写法，哈哈。） 1，懒汉式（1）线程不安全的懒汉式 public cla

python抓取所有航空公司新闻

你可能感兴趣的:(多线程,mysql,python,正则表达式,AIR)