1、比较图片和目录下的图片相似度;
#!C:/Python27
#coding=utf-8
import pytesseract
from pytesser import *
from PIL import Image,ImageEnhance,ImageFilter
import os
import fnmatch
import re,time
import urllib, random
#import hashlib
def getGray(image_file):
tmpls=[]
for h in range(0, image_file.size[1]):#h
for w in range(0, image_file.size[0]):#w
tmpls.append( image_file.getpixel((w,h)) )
return tmpls
def getAvg(ls):#获取平均灰度值
return sum(ls)/len(ls)
def getMH(a,b):#比较100个字符有几个字符相同
dist = 0;
for i in range(0,len(a)):
if a[i]==b[i]:
dist=dist+1
return dist
def getImgHash(fne):
image_file = Image.open(fne) # 打开
image_file=image_file.resize((12, 12))#重置图片大小我12px X 12px
image_file=image_file.convert("L")#转256灰度图
Grayls=getGray(image_file)#灰度集合
avg=getAvg(Grayls)#灰度平均值
bitls=''#接收获取0或1
#除去变宽1px遍历像素
for h in range(1, image_file.size[1]-1):#h
for w in range(1, image_file.size[0]-1):#w
if image_file.getpixel((w,h))>=avg:#像素的值比较平均值 大于记为1 小于记为0
bitls=bitls+'1'
else:
bitls=bitls+'0'
return bitls
'''
m2 = hashlib.md5()
m2.update(bitls)
print m2.hexdigest(),bitls
return m2.hexdigest()
'''
a=getImgHash(".//testpic//001n.bmp")#图片地址自行替换
files = os.listdir(".//testpic")#图片文件夹地址自行替换
for file in files:
b=getImgHash(".//testpic//"+str(file))
compare=getMH(a,b)
print file,u'相似度',str(compare)+'%'
#!C:/Python27
#coding=utf-8
"""
1、截屏,获取验证码图片;
2、裁剪验证码为4个小图片;
3、把4张图片放大为255*255像素图
4、拿每块小图片去模型目录找相似的图片*4次;
5、组装成字符串--->4位验证码
"""
import os ,sys
import fnmatch
import re,time
import urllib, random
import pytesseract
from pytesser import *
from PIL import Image,ImageDraw
def GetVerficode(): #下载验证码图片
for i in range(1,101):
url = 'https://cas.sf-express.com/cas/imgcode?a=0.7860542547321294'
print "download", i
file("./code/%04d.jpg" % random.randrange(10000), "wb").write(urllib.urlopen(url).read())
def GetImg():
ImgPath = (".//code//9911.jpg")
def CutCrop():#分隔验证图片
"""global data 全局变量在整个函数中有效"""
ImgPath = (".//code//")
j = 1000
for f in os.listdir(ImgPath):
if f.endswith(".jpg"):
print f
img = Image.open(ImgPath+f).convert('L')
print img.size
w, h = img.size
#rowheight = h // rownum
#colwidth = w // colnum
#imgry.show()
for i in range(4):
x = 10 + i*24 #验证码的x,y坐标
y = 6
img.crop((x-4, y,x+6, y+14)).save("font/%d.bmp" % j)
print "j=",j
j += 1
def fixed_size():
"""按照固定尺寸放大处理4图片"""
dirpath =(".//font//")
j = 10000
for imgfile in os.listdir(dirpath):
print imgfile
im = Image.open(dirpath+imgfile)
#im.show()
size = (256, 256)
im2 =im.resize(size).convert('RGB')
out = im2.resize(size,Image.ANTIALIAS)
out.save(dirpath+'%d.bmp' % j)
print u"\n按固定尺寸放大*4张图片,处理已完成"
j += 1
"""对比图片方法开始"""
def getGray(image_file):
tmpls=[]
for h in range(0, image_file.size[1]):#h
for w in range(0, image_file.size[0]):#w
tmpls.append( image_file.getpixel((w,h)) )
return tmpls
def getAvg(ls):#获取平均灰度值
return sum(ls)/len(ls)
def getMH(a,b):#比较100个字符有几个字符相同
dist = 0;
for i in range(0,len(a)):
if a[i]==b[i]:
dist=dist+1
return dist
def getImgHash(fne):
image_file = Image.open(fne) # 打开
image_file=image_file.resize((12, 12))#重置图片大小我12px X 12px
image_file=image_file.convert("L")#转256灰度图
Grayls=getGray(image_file)#灰度集合
avg=getAvg(Grayls)#灰度平均值
bitls=''#接收获取0或1
#除去变宽1px遍历像素
for h in range(1, image_file.size[1]-1):#h
for w in range(1, image_file.size[0]-1):#w
if image_file.getpixel((w,h))>=avg:#像素的值比较平均值 大于记为1 小于记为0
bitls=bitls+'1'
else:
bitls=bitls+'0'
return bitls
'''
m2 = hashlib.md5()
m2.update(bitls)
print m2.hexdigest(),bitls
return m2.hexdigest()
'''
"""对比图片方法结束"""
def FindImg():
"""
遍历Pic 目录下的图片,去路径中找图片后缀为PNG的图片,对比
"""
file_dir = (".\\BigImg")
L=[]
for root,dirs,files in os.walk(file_dir):
#print root,dirs,files #路径地址、文件夹名、文件名
for file in files:
L.append(os.path.join(root,file))
#if imgfile.endswith(".bmp"):
#print L ,len(L)
file_dir = (".\\result")
M=[]
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] == '.bmp':
#print os.path.join(root, file)
im2 = Image.open(os.path.join(root, file))
#im2.show()
M.append(os.path.join(root, file))
#print M,len(M)
"""外层循环4次,内层循环多次"""
#i = len(L)
for i in L :
print "\n",i ,"\n"
a=getImgHash(i)
for j in M:
#im2 = Image.open(j)
#print j
b=getImgHash(j)
compare=getMH(a,b)
print j,u'相似度',str(compare)+'%'
"""下载图片"""
#GetVerficode()
"""裁剪图片"""
#CutCrop()
"""放大图片"""
#fixed_size()
"""对比图片"""
FindImg()