查找匹配度最高的字符串(转)

要求在3W多个中,找出与给定的字符串匹配度最高的字符串.

匹配度计算规则:

匹配度=最长的相同字串的长度 / 两个字符串中最长的字符串的长度.

 

代码如下:

定义的一个主体类:

None.gif   public   delegate   decimal  CompareCondition(String value);
None.gif
None.gif    
public   class  StringCompare
ExpandedBlockStart.gifContractedBlock.gif    
dot.gif {
InBlock.gif        
private List<IntraString> intraList = new List<IntraString>();
InBlock.gif
InBlock.gif        
public StringCompare(List<String> strlist)
ExpandedSubBlockStart.gifContractedSubBlock.gif        
dot.gif{
InBlock.gif            
for (int i = 0; i < strlist.Count; i++)
ExpandedSubBlockStart.gifContractedSubBlock.gif            
dot.gif{
InBlock.gif                intraList.Add(
new IntraString(strlist[i]));
ExpandedSubBlockEnd.gif            }

ExpandedSubBlockEnd.gif        }

InBlock.gif
InBlock.gif        
public List<IntraString> Compare(CompareCondition condition)
ExpandedSubBlockStart.gifContractedSubBlock.gif        
dot.gif{
InBlock.gif            
for (int i = 0; i < intraList.Count; i++)
ExpandedSubBlockStart.gifContractedSubBlock.gif            
dot.gif{
InBlock.gif                intraList[i].Compare(condition);
ExpandedSubBlockEnd.gif            }

InBlock.gif            intraList.Sort(
delegate(IntraString a, IntraString b)
ExpandedSubBlockStart.gifContractedSubBlock.gif            
dot.gif{
InBlock.gif                
if (a.CompareResult > b.CompareResult)
ExpandedSubBlockStart.gifContractedSubBlock.gif                
dot.gif{
InBlock.gif                    
return 1;
ExpandedSubBlockEnd.gif                }

InBlock.gif                
else if (a.CompareResult == b.CompareResult)
ExpandedSubBlockStart.gifContractedSubBlock.gif                
dot.gif{
InBlock.gif                    
return 0;
ExpandedSubBlockEnd.gif                }

InBlock.gif                
else
ExpandedSubBlockStart.gifContractedSubBlock.gif                
dot.gif{
InBlock.gif                    
return -1;
ExpandedSubBlockEnd.gif                }

ExpandedSubBlockEnd.gif            }

InBlock.gif            );
InBlock.gif            
return intraList;
ExpandedSubBlockEnd.gif        }

ExpandedBlockEnd.gif    }

None.gif
None.gif    
public   class  IntraString
ExpandedBlockStart.gifContractedBlock.gif    
dot.gif {
InBlock.gif        
private String value;
InBlock.gif
InBlock.gif        
public IntraString(String value)
ExpandedSubBlockStart.gifContractedSubBlock.gif        
dot.gif{
InBlock.gif            
this.value = value;
ExpandedSubBlockEnd.gif        }

InBlock.gif
InBlock.gif        
public String Value
ExpandedSubBlockStart.gifContractedSubBlock.gif        
dot.gif{
ExpandedSubBlockStart.gifContractedSubBlock.gif            
get dot.gifreturn value; }
ExpandedSubBlockStart.gifContractedSubBlock.gif            
set dot.gifthis.value = value; }
ExpandedSubBlockEnd.gif        }

InBlock.gif
InBlock.gif        
private decimal compareResult;
InBlock.gif
InBlock.gif        
public decimal CompareResult
ExpandedSubBlockStart.gifContractedSubBlock.gif        
dot.gif{
ExpandedSubBlockStart.gifContractedSubBlock.gif            
get dot.gifreturn compareResult; }
ExpandedSubBlockStart.gifContractedSubBlock.gif            
set dot.gif{ compareResult = value; }
ExpandedSubBlockEnd.gif        }

InBlock.gif
InBlock.gif        
public void Compare(CompareCondition condition)
ExpandedSubBlockStart.gifContractedSubBlock.gif        
dot.gif{
InBlock.gif            CompareResult 
= condition(Value);
ExpandedSubBlockEnd.gif        }

ExpandedBlockEnd.gif    }


比较规则的实现:

ExpandedBlockStart.gif ContractedBlock.gif     /**/ /// 
InBlock.gif        
///定义比较的条件
InBlock.gif        
/// 

InBlock.gif        
/// 
ExpandedBlockEnd.gif        
/// 

None.gif          private   static   decimal  Compare(String value)
ExpandedBlockStart.gifContractedBlock.gif        
dot.gif {
InBlock.gif            
string str = Guid.NewGuid().ToString().Replace("-""");
InBlock.gif            
int MaxLength = str.Length;
InBlock.gif            
if (str.Length < value.Length)
ExpandedSubBlockStart.gifContractedSubBlock.gif            
dot.gif{
InBlock.gif                MaxLength 
= value.Length;
ExpandedSubBlockEnd.gif            }

InBlock.gif            
bool find = false;
InBlock.gif            
decimal sameCount = 0;
InBlock.gif            
for (int i = 0; i < value.Length; i++)
ExpandedSubBlockStart.gifContractedSubBlock.gif            
dot.gif{
InBlock.gif                
string newStr = value.Substring(i, value.Length - i);
InBlock.gif                
for (int j = newStr.Length - 1; j >= 0; j--)
ExpandedSubBlockStart.gifContractedSubBlock.gif                
dot.gif{
InBlock.gif                    
if (str.IndexOf(newStr.Substring(0,j+1)) >= 0)
ExpandedSubBlockStart.gifContractedSubBlock.gif                    
dot.gif{
InBlock.gif                        newStr 
= newStr.Substring(0, j + 1);
InBlock.gif                        find 
= true;
InBlock.gif                        
break;
ExpandedSubBlockEnd.gif                    }

ExpandedSubBlockEnd.gif                }

InBlock.gif                
if (find)
ExpandedSubBlockStart.gifContractedSubBlock.gif                
dot.gif{
InBlock.gif                    sameCount 
= newStr.Length;
InBlock.gif                    
break;
ExpandedSubBlockEnd.gif                }

ExpandedSubBlockEnd.gif            }

InBlock.gif            
return sameCount / MaxLength;
ExpandedBlockEnd.gif        }

转载于:https://www.cnblogs.com/yylqinghao/archive/2010/06/24/1764228.html

你可能感兴趣的:(java)