根据现成的开源项目 http://code.google.com/p/pytesser/改写
在window上用easy_install安装不上 看了下源码发现代码很少 于是就想自己改写一下
添加支持网络图片的直接解析
#coding:utf-8 #import sys #reload(sys) #sys.setdefaultencoding('utf-8') import subprocess,urllib2,tempfile import os,time tesseract_exe_name = 'tesseract' # Name of executable to be called at command line temp_dir_name = tempfile.gettempdir() # Name of executable to be called at command line def call_tesseract(input_filename, output_filename): args = [tesseract_exe_name, input_filename, output_filename] proc = subprocess.Popen(args) retcode = proc.wait() def image_file_to_string(filename): image_file_path = filename text_file_path = '%s%s%sout' %(temp_dir_name,os.path.sep,time.time()) try: call_tesseract(image_file_path,text_file_path); text = file(text_file_path+'.txt').read() if text.find("Error") != -1: text = None return text finally: if(os.path.exists(text_file_path+'.txt')): os.remove(text_file_path+'.txt') def image_url_to_string(url): image_file_path = '%s%s%sin' %(temp_dir_name,os.path.sep,time.time()) try: urlinfo = urllib2.urlopen(url, None,3); if not urlinfo.headers['Content-Type'].startswith('image'): return None f = open(image_file_path,'wb') f.write(urlinfo.read()) f.close() return image_file_to_string(image_file_path); finally: if(os.path.exists(image_file_path)): os.remove(image_file_path); print image_file_to_string('D:\\pytesser_v0.0.1\\fonts_test.png')