关于网站

获取网站相关信息

import dns.resolver
import urllib2
resp = urllib2.urlopen('http://www.www.com')
print type(resp)
print resp.headers
print resp.headers['Server']
print resp.getcode()
print resp.geturl()

获取title

import mongo
import header
import pymongo
import random
import requests
import urllib2
from bs4 import BeautifulSoup


#----------------------------------------------------------------------
def url_info(url):
    """"""
    data = urllib2.Request(url,headers=header.get_header()) 
    html_url = requests.get(url, timeout=random.randint(5,10))
    #print type(data.headers)
    soup = BeautifulSoup(html_url.content, 'html.parser') 
    
    try:
        print soup.title.string
        encoding_url = html_url.encoding
        title_url = soup.title.string
        
        mongo.ls_Info.update({"URL":url},
                                 {"$set":{'title':title_url,'encoding':encoding_url}},
                                 upsert = True) 
    except Exception,e:
        print str(e)
        pass

你可能感兴趣的:(关于网站)