##TODO:
小文本采用的方式为php自带的文本比较即可:
// 简单字符串匹配,适用于当长度小的情况
func SimpleCompareTextSimilarity(prev, newUpload string) (float64, int) {
per := 0.0
i := php2go.SimilarText(prev, newUpload, &per)
return per, i
}
大文本采取的是基于海明距离
:
// 校验字符串的相似度,使用simhash进行判断
func CompareTextSimilarity(prev, newUpload string) (float64, int) {
hash1 := simhash.Simhash(simhash.NewWordFeatureSet([]byte(prev)))
hash2 := simhash.Simhash(simhash.NewWordFeatureSet([]byte(newUpload)))
compare := simhash.Compare(hash1, hash2)
return 0.0, int(compare)
}
// batch比较图片的相似度
func BatchComparePicSimilarity(funName string, filePathSlice1 []string, filePathSlice2 []string, threshold int) (int, float64, error) {
hash1 := make([]*goimagehash.ImageHash, 0)
for _, p := range filePathSlice1 {
imageHash, e := GetImgHash(funName, p)
if nil != e {
return 0, 0.0, e
}
hash1 = append(hash1, imageHash)
}
hash2 := make([]*goimagehash.ImageHash, 0)
for _, p := range filePathSlice2 {
imageHash, e := GetImgHash(funName, p)
if nil != e {
return 0, 0.0, e
}
hash2 = append(hash2, imageHash)
}
f, e := CompareImgHashes(hash1, hash2, threshold)
return -1, f, e
}
func GetImgHash(funcName string, filePath string) (*goimagehash.ImageHash, error) {
suffix := path.Ext(filePath)
if exists := utils.IsFileOrDirExists(filePath); !exists {
return nil, errors.New(fmt.Sprintf("[%s]文件不存在", filePath))
}
var img image.Image
if suffix == ".jpeg" || suffix == ".jpg" {
file, e := os.Open(filePath)
if nil != e {
return nil, e
}
image, e := jpeg.Decode(file)
if nil != e {
return nil, e
}
img = image
} else if suffix == ".png" {
file, e := os.Open(filePath)
if nil != e {
return nil, e
}
image, e := png.Decode(file)
if nil != e {
return nil, e
}
img = image
} else {
return nil, errors.New("图片格式错误,现暂仅支持jpeg,jpg,png结尾的图片")
}
imgHash := new(goimagehash.ImageHash)
var e error
switch funcName {
case "a":
imgHash, e = goimagehash.AverageHash(img)
case "d":
imgHash, e = goimagehash.DifferenceHash(img)
}
if nil != e {
return nil, e
}
return imgHash, nil
}
// srcHashes : 代表的原先存在着的hash
// newHashes: 代表的是新上传来匹配的
func CompareImgHashes(prevHashes, newHashes []*goimagehash.ImageHash, threshold int) (float64, error) {
l1 := len(prevHashes)
l2 := len(newHashes)
count := 0
for i := 0; i < l2; i++ {
for j := 0; j < l1; j++ {
distance, e := newHashes[i].Distance(prevHashes[j])
if nil != e {
return 0.0, e
}
if distance <= threshold {
count++
break
}
}
}
return float64(count) / float64(l2), nil
}
func CompareVideosWithImg(filePath1, filePath2 string, funcName string, threshold int) (float64, error) {
hashes, e := GetVideoFramesWithImg(filePath1, funcName)
if nil != e {
return 0.0, e
}
hashes2, e := GetVideoFramesWithImg(filePath2, funcName)
if nil != e {
return 0.0, e
}
return pic.CompareImgHashes(hashes, hashes2, threshold)
}
func GetVideoFramesWithImg(filePath string, funcName string) ([]*goimagehash.ImageHash, error) {
// 当level为less的时候默认为10
level, e := getVideoFrameLevel(filePath)
if nil != e {
return nil, e
}
return getVideoFramesWithGoimage(filePath, funcName, level)
}
func getVideoFrameLevel(filePath string) (int, error) {
size, e := utils.GetFileSize(filePath)
if nil != e {
return 0, e
}
if size < VIDEO_SIZE_10M {
return VIDEO_FRAME_LEVEL_LESS, nil
} else if size < VIDEO_SIZE_100M {
return VIDEO_FRAME_LEVEL_MID, nil
} else if size < VIDEO_SIZE_1G {
return VIDEO_FRAME_LEVEL_UPMID, nil
} else {
return VIDEO_FRAME_LEVEL_MOST, nil
}
}
// 通过goimage 获取hash
// 通过level 从而判断要捕获多少帧的图片
func getVideoFramesWithGoimage(filePath string, funName string, level int) ([]*goimagehash.ImageHash, error) {
picCount := int(VIDEO_LEVEL_COUNT_ARRAY[level])
// result := make([][]byte, picCount)
result := make([]*goimagehash.ImageHash,0)
// load video
vc, err := gocv.VideoCaptureFile(filePath)
if err != nil {
return nil, err
}
// fps是帧率,意思是每一秒刷新图片的数量,frames是一整段视频中总的图片数量。
frames := vc.Get(gocv.VideoCaptureFrameCount)
total := frames
fps := vc.Get(gocv.VideoCaptureFPS)
// 获取时间总长
duration := frames / fps
// fmt.Println(duration)
// 递增的值
loopAddFrequence := duration / float64(picCount)
for i, j := 0.0, 0; j < picCount; i += loopAddFrequence {
// Set Video frames
// time/duration 获取到那个时间点的百分比
frames = (i / duration) * total
vc.Set(gocv.VideoCapturePosFrames, frames)
img := gocv.NewMat()
vc.Read(&img)
// gocv.IMWrite("/Users/joker/Desktop/temp/images/"+utils.GenerateUUID()+"----"+strconv.Itoa(j)+".jpg", img)
// result[j] = img.ToBytes()
image, err := img.ToImage()
if nil != err {
return result, err
}
imageHash := new(goimagehash.ImageHash)
switch funName {
case "a":
imageHash, err = goimagehash.AverageHash(image)
}
if nil != err {
return result, err
}
result = append(result, imageHash)
j++
}
return result, err
}