使用CDK进行相似度搜索

使用CDK进行相似度搜索

package com.founder.cdk;

import Java.io.StringReader;
import Java.sql.Connection;
import Java.sql.ResultSet;
import Java.sql.SQLException;
import Java.util.ArrayList;
import Java.util.BitSet;
import Java.util.List;

import org.openscience.cdk.Molecule;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.fingerprint.Fingerprinter;
import org.openscience.cdk.io.MDLReader;
import org.openscience.cdk.similarity.Tanimoto;

public class CDKTest {

 /**
  * @param args
  */
 public static void main(String[] args) {
  
  // MySQL
  long t1 = System.currentTimeMillis();
  try {
   Class.forName("com.mysql.jdbc.Driver").newInstance();
   Connection con = Java.sql.DriverManager
     .getConnection(
       "jdbc:mysql://localhost/coocoo?useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull",
       "root", "root");
   

   ResultSet results = null;
   String querySQL = "select id, structure from structure ";
   
   results = con.createStatement().executeQuery(querySQL);
 
   // dump out the results

   List<Molecule> list = new ArrayList<Molecule>();
   Fingerprinter fp = new Fingerprinter();
   BitSet bt = null;
   while (results.next()) {
    Long id = results.getLong("id");
    
    //根据结构数据生成分子对象
    StringReader mdl = new StringReader(results.getString("structure"));
    MDLReader cdkMDL = new MDLReader(mdl);
    Molecule molecule = new Molecule();
    cdkMDL.read(molecule);

    if (id == 1220) {
     bt = fp.getFingerprint(molecule);
    }
    list.add(molecule);
    
   } 
   System.out.println("size:=" + list.size());
   
   List<Molecule> resultList = new ArrayList<Molecule>();
        
         long t2 = System.currentTimeMillis();
         System.out.println("Thread: collection data in " + (t2 - t1) + " ms.");
         for (Molecule molecule : list) {
             try {
                 float coefficient = Tanimoto.calculate(fp.getFingerprint(molecule), bt);  //计算相似度
                 if (coefficient > 0.9) {
                  resultList.add(molecule);
                 }
             } catch (CDKException e) {

             }
         }
         long t3 = System.currentTimeMillis();
        
         System.out.println(resultList.size());
         System.out.println("Thread: Search in " + (t3 - t2) + " ms.");
        
   con.close();
  } catch (InstantiationException e) {
   e.printStackTrace();
  } catch (IllegalAccessException e) {
   e.printStackTrace();
  } catch (ClassNotFoundException e) {
   e.printStackTrace();
  } catch (SQLException e) {
   e.printStackTrace();
  } catch (CDKException e) {
   e.printStackTrace();
  }
  long t4 = System.currentTimeMillis();
        System.out.println("Thread: all in " + (t4 - t1) + " ms.");
 }

}

你可能感兴趣的:(使用CDK进行相似度搜索)