作为新入门的pythoner,对于经常查阅文档是必须的,可是python的英文文档的确让英语比较差的同学非常之蛋疼。
稍微入了一下门之后,正想找一个练手的项目呢,偶然想到何不搞一个阅读python文档的工具呢,大概思路就是详细列出一个module的东西属性,并且自动翻译__doc__文档。
大概思路是首先判断是module还是它的属性,然后获取其相关信息,如果是__doc__,那么从网路上使用谷歌翻译它,获取翻译的中文文字,然后以一定的方式组合显示中英文。
---------------------------------------下面贴代码------------------------------------------------------------------
get_object_info.py
#!/usr/bin/python #coding=utf-8 #Python -V:Python 2.6 #filename:get_object_info.py __author__ = 'Tattoo' __date__ = '2013.07.09' import en_2_zh import os SPLITE_LENGTH = 60 SPLITE_CHAR = '*' def _my_print(title, text, is_none_prompt=False): if is_none_prompt is False and (text is None or text.strip() is ''): return width = len(title) half_width = (SPLITE_LENGTH - width) / 2 #print SPLITE_CHAR*half_width + text + SPLITE_CHAR * half_width print os.linesep + SPLITE_CHAR * half_width + title + SPLITE_CHAR * half_width print text #print SPLITE_CHAR * SPLITE_LENGTH + "\n\n" def _get_doc(cls): try: return cls.__doc__ except: return '' def _get_name(cls): try: return cls.__name__ except: return '' def _get_dict(cls): try: return str(cls.__dict_) except: return '' def _get_module(cls): try: return cls.__module__ except: return '' def _get_base(cls): try: return cls.__base__ except: return '' def _do_import_module(cls_str): try: return __import__(cls_str) except ImportError: return None def _do_import_attr(cls_str, attr_str): try: module = __import__(cls_str, globals(), locals(), [attr_str], -1) #object = _temp.object return module #return __import__(attr_str, cls_str)#, fromlist=[attr_str]) except: return None def _get_file(cls): try: return cls.__file__ except: return '' def _print_cls_info(cls): #text = dir(my_cls) #print text print('这是一个模块') _my_print("模块名称列表dir(" + str(cls) + "):", ', '.join(dir(cls))) doc = _get_doc(cls) #_my_print("文档__doc__:" + doc) _my_print("文档__doc__翻译:", en_2_zh.get_translate_mix_text(doc)) _my_print("名称__name__:", _get_name(cls)) _my_print("字典属性:__dict__:", _get_dict(cls)) _my_print("模块__module__:", _get_module(cls)) _my_print("__base__:", _get_base(cls)) _my_print("__file__:", _get_file(cls)) def _print_attr_info(attr): print attr print dir(attr) print('这是一个属性') _my_print("模块名称列表dir(" + str(attr) + "):", ', '.join(dir(attr))) doc = _get_doc(attr) #_my_print("文档__doc__:" + doc) _my_print("文档__doc__翻译:", en_2_zh.get_translate_mix_text(doc)) _my_print("名称__name__:", _get_name(attr)) _my_print("字典属性:__dict__:", _get_dict(attr)) _my_print("模块__module__:", _get_module(attr)) _my_print("__base__:", _get_base(attr)) _my_print("__file__:", _get_file(attr)) def get_class_info(cls): _my_print('cls = ' + cls, cls) #print dir(cls) #my_cls = __import__(cls) my_cls = _do_import_module(cls) if my_cls is not None: _print_cls_info(my_cls) else: index = cls.rindex('.') cls2 = cls[:index] attr = cls[(index+1):] my_cls2 = _do_import_attr(cls2, attr) print() if my_cls2 is not None and attr is not None: print str(my_cls2) _print_attr_info(my_cls2.__dict__[attr]) else: print '无法导入此模块' def test_doc(): text = ''' __stdin__ -- the original stdin; don't touch! __stdout__ -- the original stdout; don't touch! __stderr__ -- the original stderr; don't touch! __displayhook__ -- the original displayhook; don't touch! __excepthook__ -- the original excepthook; don't touch! Functions: displayhook() -- print an object to the screen, and save it in __builtin__._ excepthook() -- print an exception and its traceback to sys.stderr exc_info() -- return thread-safe information about the current exception exc_clear() -- clear the exception state for the current thread ''' import sys print en_2_zh.test(_get_doc(sys)) import sys, os if __name__ == "__main__": cls = sys.argv[1] get_class_info(cls) #test_doc()
#!/usr/bin/python # -*- coding: utf-8 -*- #Python -V: Python 2.6.6 #filename:GoogleTranslation1.2.py __author__ = "Yinlong Zhao (zhaoyl[at]sjtu[dot]edu[dot]cn)" __date__ = "$Date: 2013/04/21 $" import re import urllib,urllib2 import random import os import tag #urllib: #urllib2: The urllib2 module defines functions and classes which help in opening #URLs (mostly HTTP) in a complex world — basic and digest authentication, #redirections, cookies and more. def translate(text): ''' 模拟浏览器的行为,向Google Translate的主页发送数据,然后抓取翻译结果 此行为直接返回结果 ''' return _get_translate_text(_get_html(text)) def _get_html(text): ''' 获取翻译后的网页 ''' #text 输入要翻译的英文句子 text_1=text #'langpair':'en'|'zh-CN'从英语到简体中文 values={'hl':'zh-CN','ie':'UTF-8','text':text_1,'langpair':"'en'|'zh-CN'"} url='http://translate.google.cn/translate_t' data = urllib.urlencode(values) req = urllib2.Request(url,data) #模拟一个浏览器 browser='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)' req.add_header('User-Agent',browser) #向谷歌翻译发送请求 response = urllib2.urlopen(req) #读取返回页面 html=response.read() return html def _get_translate_text(html): ''' 根据网页内容,提取翻译文本 ''' #从返回页面中过滤出翻译后的文本 #使用正则表达式匹配 #翻译后的文本是'TRANSLATED_TEXT='等号后面的内容 #.*? non-greedy or minimal fashion #(?<=...)Matches if the current position in the string is preceded #by a match for ... that ends at the current position p=re.compile(r"(?<=TRANSLATED_TEXT=)(.*?);INPUT_TOOL_PATH") m=p.search(html) #text_2=m.group(0).strip(';') #return text_2 return m.group(1) def _get_source_and_translate_mix_text(source_text, translate_text): ''' 根据原文本和翻译后的文本进行混合,保持翻译文本在每行的文本下面 ''' if source_text is None: return None result= [] source_lines = source_text.splitlines() translate_lines = translate_text.splitlines() # print os.linesep + str(len(source_lines)) + 'source_lines:' + ''.join(source_lines) # print os.linesep + str(len(translate_lines)) + 'translate_lines:' + ''.join(translate_lines) for source, translate in zip(source_lines, translate_lines): result.append(os.linesep + source + ' '*(80-len(source)) + '译:' + translate) return ''.join(result) if __name__ == "__main__": test() def test_and_save(orignal_text): ''' 测试并且本地文件保存记录 ''' add_tag_text = tag.add_tag(orignal_text) print '添加tag的文本:' , add_tag_text #保存结果 filename='test_result.txt' fp=open(filename,'w') fp.write('原先的文本:' + orignal_text) fp.write('\n' + '$'*50 + '\n') html = _get_html(add_tag_text) text_remove_tag = tag.remove_tag(_get_translate_text(html)) fp.write('提取出来的翻译结果为:') fp.write(text_remove_tag) text_mix = _get_source_and_translate_mix_text(tag.remove_tag(add_tag_text), text_remove_tag) fp.write('整理后的结果:' + text_mix) fp.close() return text_mix def get_translate_mix_text(orignal_text): ''' 获取翻译后并且混合的文本 ''' add_tag_text = tag.add_tag(orignal_text) html = _get_html(add_tag_text) text_remove_tag = tag.remove_tag(_get_translate_text(html)) text_mix = _get_source_and_translate_mix_text(tag.remove_tag(add_tag_text), text_remove_tag) return text_mix
tag.py
#!/usr/bin/python #coding=utf-8 import sys import random import os import re def _random_str(randomlength=8): ''' 获取指定长度的随机字符串 ''' s = '' chars = 'AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz0123456789' length = len(chars) - 1 #rand = random() for i in range(randomlength): s+=chars[random.randint(0, length)] return s #使用其他字母等有可能会影响翻译的词意 其他字符会被翻译成相应的中文字符 SPLITE_TAG_TEXT = '________'#' splite_tag_%s ' % _random_str() def add_tag(text): ''' 给文本添加TAG ''' if text is None: return None pat = re.escape(os.linesep) ss = re.sub(pat, SPLITE_TAG_TEXT, text) return ss def remove_tag(text, replacement=os.linesep): ''' 给文本去掉TAG ''' if text is None: return None pat = re.escape(SPLITE_TAG_TEXT) result = re.sub(pat, replacement, text) return result def test(): s = sys.__doc__ print s print '*' * 50 ss = add_tag(s) print ss sss = remove_tag(ss) print sss if __name__ == '__main__': test()
然后命令行直接执行$PY_HELP sys
打印如下:
/python_study$ $PY_HELP sys *************************cls = sys************************* sys 这是一个模块 *****模块名称列表dir(<module 'sys' (built-in)>):***** __displayhook__, __doc__, __excepthook__, __name__, __package__, __stderr__, __stdin__, __stdout__, _clear_type_cache, _current_frames, _getframe, api_version, argv, builtin_module_names, byteorder, call_tracing, callstats, copyright, displayhook, dont_write_bytecode, exc_clear, exc_info, exc_type, excepthook, exec_prefix, executable, exit, flags, float_info, getcheckinterval, getdefaultencoding, getdlopenflags, getfilesystemencoding, getprofile, getrecursionlimit, getrefcount, getsizeof, gettrace, hexversion, maxint, maxsize, maxunicode, meta_path, modules, path, path_hooks, path_importer_cache, platform, prefix, py3kwarning, pydebug, setcheckinterval, setdlopenflags, setprofile, setrecursionlimit, settrace, stderr, stdin, stdout, subversion, version, version_info, warnoptions ********************文档__doc__翻译:******************** This module provides access to some objects used or maintained by the 译:'该模块提供访问一些使用或维持the interpreter and to functions that interact strongly with the interpreter. 译:interpreter的对象和交互的功能与的interpreter. 译: Dynamic objects: 译:Dynamic对象: 译: argv -- command line arguments; argv[0] is the script pathname if known 译:argv - 命令行参数; argv的强烈[0]为脚本的路径名,如果known path -- module search path; path[0] is the script directory, else '' 译:path - 模块搜索路径,路径[ 0]为脚本目录,否则\x26#39;\x26#39; modules -- dictionary of loaded modules 译:modules - 字典装modules 译: displayhook -- called to show results in an interactive session 译:displayhook - 称为互动session excepthook -- called to handle any uncaught exception other than SystemExit 译:excepthook显示结果 - 调用来处理任何未捕获的异常比SystemExit To customize printing in an interactive session or to install a custom 译:定制印刷在交互式会话或安装custom top-level exception handler, assign other functions to replace these. 译:顶层的异常处理程序,分配等功能以取代these. 译: exitfunc -- if sys.exitfunc exists, this routine is called when Python exits 译:exitfunc - 如果sys.exitfunc存在,这个例程被调用,当Python exits Assigning to sys.exitfunc is deprecated; use the atexit module instead. 译:分配sys.exitfunc的是不赞成使用的是atexit模块 - 标准instead. 译: stdin -- standard input file object; used by raw_input() and input() 译:stdin的输入文件对象使用raw_input函数()和输入() stdout -- standard output file object; used by the print statement 译:stdout - 标准输出文件对象所使用的打印statement stderr -- standard error object; used for error messages 译:stderr - 标准错误对象;用于错误messages By assigning other file objects (or objects that behave like files) 译:通过这些指定其他文件对象(或对象像文件) to these, it is possible to redirect all of the interpreter's I/O. 译:它可以重定向所有翻译的I / O. 译: last_type -- type of last uncaught exception 译:last_type的 - 类型去年未捕获exception last_value -- value of last uncaught exception 译:last_value的 - 值最后未捕获exception last_traceback -- traceback of last uncaught exception 译:last_traceback的 - 回溯去年未捕获exception These three are only available in an interactive session after a 译:这三只可在一个交互式会话回溯a traceback has been printed. 译:后已印。 译: exc_type -- type of exception currently being handled 译:exc_type - 异常类型的目前正在handled exc_value -- value of exception currently being handled 译:exc_value的 - 价值的例外目前正在handled exc_traceback -- traceback of exception currently being handled 译:exc_traceback的 - 回溯目前正在handled The function exc_info() should be used instead of these three, 译:的功能exc_info()应该用来代替这三个例外, because it is thread-safe. 译:,因为它是线程safe. 译: Static objects: 译:Static的对象: 译: maxint -- the largest supported integer (the smallest is -maxint-1) 译:maxint - 支持的最大整数(最小的是MAXINT:1) maxsize -- the largest supported length of containers. 译:maxsize - 支持的最大长度的containers. maxunicode -- the largest supported character 译:maxunicode - 最大支持character builtin_module_names -- tuple of module names built into this interpreter 译:builtin_module_names的 - 模块名称元组本interpreter version -- the version of this interpreter as a string 译:version建成 - 这个解释器的版本作为一个string version_info -- version information as a tuple 译:version_info - 版本信息为tuple hexversion -- version information encoded as a single integer 译:hexversion - 版本信息作为一个单一integer copyright -- copyright notice pertaining to this interpreter 译:copyright编码 - 版权声明有关interpreter platform -- platform identifier 译:platform - 平台identifier executable -- pathname of this Python interpreter 译:executable - 这Python interpreter prefix -- prefix used to find the Python library 译:prefix的路径名 - 所使用的前缀找到了Python library exec_prefix -- prefix used to find the machine-specific Python library 译:exec_prefix的 - 前缀用来寻找机器特定的Python library __stdin__ -- the original stdin; don't touch! 译:__stdin__的 - 原STDIN,不要碰! __stdout__ -- the original stdout; don't touch! 译:__stdout__ - 原来的标准输出,不要碰! __stderr__ -- the original stderr; don't touch! 译:__stderr__ - 原来stderr的,不要碰! __displayhook__ -- the original displayhook; don't touch! 译:__displayhook__ - 原来displayhook ,不要碰! __excepthook__ -- the original excepthook; don't touch! 译:__excepthook__ - 原来excepthook;不要碰!的 译: Functions: 译:Functions: 译: displayhook() -- print an object to the screen, and save it in __builtin__. 译:displayhook() - 打印到屏幕上,一个对象,并将其保存在的__ builtin__. _excepthook() -- print an exception and its traceback to sys.stderr 译:_excepthook() - 打印一个例外,其回溯到() - sys.stderr exc_info() -- return thread-safe information about the current exception 译:exc_info返回线程安全信息的的当前exception exc_clear() -- clear the exception state for the current thread 译:exc_clear() - 清除异常状态的的当前thread exit() -- exit the interpreter by raising SystemExit 译:exit() - 退出解释通过提高SystemExit getdlopenflags() -- returns flags to be used for dlopen() calls 译:getdlopenflags() - 返回标志用于对dlopen( )calls getprofile() -- get the global profiling function 译:getprofile() - 全球的分析function getrefcount() -- return the reference count for an object (plus one :-) 译:getrefcount() - 返回的引用计数的对象(加:-) getrecursionlimit() -- return the max recursion depth for the interpreter 译:getrecursionlimit() - 返回的最大递归深度的interpreter getsizeof() -- return the size of an object in bytes 译:getsizeof() - 返回的大小对象在bytes gettrace() -- get the global debug tracing function 译:gettrace() - 得到全局调试跟踪function setcheckinterval() -- control how often the interpreter checks for events 译:setcheckinterval的() - 控制频率解释器检查events setdlopenflags() -- set the flags to be used for dlopen() calls 译:setdlopenflags() - 设置对dlopen(可以使用)的标志calls setprofile() -- set the global profiling function 译:setprofile() - 在全球分析function setrecursionlimit() -- set the max recursion depth for the interpreter 译:setrecursionlimit的设置( ) - 设置最大递归深度的interpreter settrace() -- set the global debug tracing function 译:settrace()的 - 设置全局调试跟踪function **********************名称__name__:********************** sys
大概对写python代码有了初步印象了,
1.代码量相比较java来说,少了太多了
2.程序小点儿还好,如果太大了调试可能会比较麻烦,因为没有类型检查,稍微改一点儿东西,都要好好进行测试。对于测试,貌似适合驱动测试开发,不过真正有多少人这样开发呢?这样也有好处,好的代码和坏的代码区别明显,好的代码会严格按照规则来写,即使大项目也能保持代码简洁,坏的代码只会越写越差。
3.包相关的东西有点儿混乱,没有层级结构,眉毛胡子一把抓,感觉乱乱的
4.变量没有类型,函数也没有返回类型,纠结.....
5.python项目重构可能会比较麻烦,没有检查,没有能够减轻人力的好的编辑器(怀念eclipse),全部手打。。。
有得必有失,有失必有得