#  -*- coding: utf-8 -*-



from HttpRequestModule  import * 

import os
import json
import traceback

import codecs
from lxml  import etree
import StringIO, gzip 
import sys
reload(sys)
sys.setdefaultencoding( ' utf-8 ')

def write_file(file_name,file_data,encoding):
     if len(file_data) == 0 :
         print  " file_data is zero "
         return
    file_dir = r " D:\fs\test_data\qqzone "
    file_path=os.path.join(file_dir,file_name)
     print file_path   
#     fp=open(file_path,"w")
#
    fp.write(file_data)
#
    fp.flush()
#
    fp.close()
    with codecs.open(file_path, " w ",encoding) as f:
        f.write(file_data)


def decodeJson(json_string):
    decode_json=None
     try:
        decode_json=json.loads(json_string) 
         return decode_json
     except (TypeError, ValueError) as err:
         print' TypeError or ValueError:{0} '.format(err) )
     except  Exception,e:
         print( traceback.format_exc() )
         pass
     return decode_json



def getUserBlogList():
    blog_list=[]
    diray_url= '''
    http://b1.qzone.qq.com/cgi-bin/blognew/get_abs?hostUin=859226880&blogType=0&cateName=&cateHex=&statYear=2015&reqInfo=7&pos=0&num=15&sortType=0&absType=0&source=0&rand=0.6346770680975169&ref=qzone&g_tk=1611717761&verbose=1
    
'''   
    data=doGet(diray_url) 
    data_len = len(data)
     if data_len == 0 :
         print  " data len is 0 "
         return blog_list
    data_json = data[10:data_len-2]  
     # write_file('bloglist.txt',data_json,'utf-8')   
    decode_json=decodeJson(data_json.decode( " gbk "))
     if decode_json == None :
         print  " decode_json is None "
         return []
     if decode_json[ ' code '] != 0:
         print  " server response code is  "+decode_json[ ' code ']
         return []
    data =decode_json[ ' data ']         
     if data[ ' totalNum '] <=0 :
         print  " server response totalnum is  "+data[ ' totalNum ']
         return []
    blog_list=data[ ' list ']   
     return blog_list

def getUserBlog(uin,blogid):
    url= '''    
    http://b1.qzone.qq.com/cgi-bin/blognew/blog_output_data?uin=%(uin)s&blogid=%(blogid)s&styledm=ctc.qzonestyle.gtimg.cn&imgdm=ctc.qzs.qq.com&bdm=b.qzone.qq.com&mode=2&numperpage=15&timestamp=1437033537&dprefix=&inCharset=gb2312&outCharset=gb2312&ref=qzone
    
'''%{ ' uin ':uin, ' blogid ':blogid}
   
    my_headers={
     " Accept-Encoding ": " gzip,deflate,sdch ",
     " Accept-Language "" zh-CN,zh;q=0.8,en;q=0.6 " ,
     " User-Agent "" Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36 " ,
     " Accept "" text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 " ,
     " Referer "" http://ctc.qzs.qq.com/qzone/newblog/blogcanvas.html "    
    }
    request = urllib2.Request(url,headers=my_headers)
     try:
        response = urllib2.urlopen(request)  
     except URLError,e:   
         if hasattr(e,  ' code '):    
             print( ' The server couldn\ 't fulfill the request. errorcode:{0} ' .format(e.code ))                  
         elif hasattr(e,  ' reason '): 
             print( ' We failed to reach a server. reason:{0} '.format(e.reason ))                           
     else:        
        page = response.read()  
         return page                     
   
     return  ""

def getText(elem):
    rc = []
     for node  in elem.itertext():
        rc.append(node.strip())
     return  ''.join(rc)

def gzdecode(data) :
    compressedstream = StringIO.StringIO(data)
    gziper = gzip.GzipFile(fileobj=compressedstream)  
    data2 = gziper.read()    #  读取解压缩后数据 
     return data2 
    
def test(blogid):
     print blogid
    blog_data=getUserBlog( ' 859226880 ',blogid)
    blog_data=gzdecode(blog_data)     
     # write_file( blogid+'.html',blog_data )
     # return
     try
        content=blog_data.decode( ' utf-8 ')
        tree=etree.HTML(content)    
        node=tree.xpath( " //div[@id='blogDetailDiv'] ")[0]
        tgt_data=getText(node)
         print  " * "*30
         print tgt_data
        write_file( blogid+ ' .txt ',tgt_data,  ' gbk ')
         return
     except  Exception,ex :
         print  " 111 ",Exception, " : ",ex
         try:            
            content=blog_data.decode( ' gbk ')
            tree=etree.HTML(content)    
            node=tree.xpath( " //div[@id='blogDetailDiv'] ")[0]
            tgt_data=getText(node)
             print  " _ "*30
             print tgt_data
            write_file( blogid+ ' .txt ',tgt_data , ' utf-8 ')
         except  Exception,ex :
             print  " 222 ",Exception, " : ",ex       
    
 
def main():
     print  " main "
    test( " 1288281044 ")
     # return 
    blog_list=getUserBlogList()
     for blog_item  in blog_list:
        blogId=blog_item[ ' blogId ']
         print blogId
        test( str(blogId) )
     pass
    


main()