自动下载google reader里面的星标文章

1. google reader马上就要关闭了,最后一次看看俺的浏览记录吧

2. 对于以前订阅的资料和星标文章,可以通过takeout功能把链接下载下来,加到别的rss阅读器里面(俺用的是greatnews客户端阅读器)

对于星标的文章,按自己写了个py script把链接的内容也一起下载下来

自动下载google reader里面的星标文章
#E:\soft\Python27\down_googlereader_starred.py



import urllib,os,sys

import json,string



def getUrlContent(url):

    fp =urllib.urlopen(url)

    cont =fp.read()

    fp.close()

    return cont

    

def ToFname(title):

    rep_chars ='\n;:%&^|\\/?<>'

    s= title

    for i in rep_chars:

        s = s.replace('%c'%(i),'')

    return s

    

    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)

    return ''.join(c for c in title if c in valid_chars)



def GetCfg_latest_id():

    fname ='cfg.txt'

    latest_id =0

    if os.path.isfile(fname):

        fp =open(fname,'r')

        latest_id =int(fp.readline().strip())

        fp.close()

    return latest_id

    

def logFile(fname, cont):

    #if os.path.isfile(fname):

    #    print fname, 'esist!'

    

    fp=open(fname,'w')

    fp.write(cont)

    fp.close()

    

def SetCfg_latest_id(id):

    fname ='cfg.txt'

    logFile(fname, '%d'%(id))

    

cont =open(r'F:\sw\<foldername>-takeout\Reader\starred.json','r').read()

li =json.loads(cont)



latest_id =GetCfg_latest_id()

for item in li['items'][latest_id:]:

    url =item['alternate'][0]['href']

    title =item['title']

    print title

    fname ='%.3d.%s.htm'%(latest_id, ToFname(title))

    print fname

    isExcept =0

    try:

        urlcont =getUrlContent(url)

        open(fname,'w').write(urlcont)

    except Exception, e:

        print e

        isExcept =1

        urlcont ='%s\n<br>\n%s<br>\n%s'%(url,str(e),fname)

        urlcont= urlcont.encode('gb2312')

        fname ='%.3d.Except.htm'%(latest_id)

        open(fname,'w').write(urlcont)

    

    latest_id =latest_id+1

    SetCfg_latest_id(latest_id)
View Code

 

你可能感兴趣的:(Google)