对于亿级数量级数据进行模糊匹配的业务需求,传统sql查询肯定不行。一种解决方法是可以利用hive来匹配,另一种不妨试试Apache的FileUtils工具类,具体处理方法是:1.将要处理的数据导出文件格式 ;2.利用FileUtils类进行读写操作,没读一行进行业务逻辑判断,若符合自己需求,则再次写入到另一文件中。只要磁盘io给力,分分钟钟筛选完毕。
FileUtils.writeStringToFile(file, sb.toString(), "UTF-8",true); //true参数表示追加内容
package com.rzx.update.job;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
/**
*
*/
@Component("fileDataThirdPayeeJob")
@RunWith(SpringJUnit4ClassRunner.class) // 使用junit4进行测试
@ContextConfiguration(locations = { "classpath:applicationContext.xml" }) // 加载配置文件
public class FileDataThirdPayeeJob {
private static Logger logger = LoggerFactory.getLogger(FileDataThirdPayeeJob.class);
/**
* 一行一行地读取文件的例子
*
* @throws IOException
*/
@Test
public void execute() throws IOException {
logger.info("start读取文件================");
List lines = FileUtils.readLines(new File("/home/full_amount/company_business.dat"), "UTF-8");
logger.info("start1================");
Iterator line = lines.iterator();
while(line.hasNext()){
String companyInfo = line.next();
logger.info(companyInfo);
//'预付卡,支付,收单,受理,银行卡,POS'
File file = new File("/home/yhj/third_payee_data/third_payee_data.dat");
StringBuffer sb = new StringBuffer();
String[] str = {"预付卡","支付","收单","受理","银行卡","POS"};
for(int i = 0;i < str.length-1;i ++){
if(companyInfo.contains(str[i])){
sb.append(companyInfo+"\t\u000B"+str[i]+"\u000B\t\n");
}
}
logger.info("end1================");
logger.info("start1================");
FileUtils.writeStringToFile(file, sb.toString(), "UTF-8");
String[] str1={"智能卡","代理服务","服务卡","结算","充值","IC卡","一卡通","电子卡","磁卡","终端","转账","充值卡","缴费","金融设备","代缴","读卡机","收款机","终端机","记帐","磁条卡ICO","现钞","收转","吸储","网上支付","加油卡","就餐卡","健身卡","小额","售卡","代付","帐单","P2P","网贷"};
StringBuffer sb1 = new StringBuffer();
for(int i = 0;i < str1.length-1;i ++){
if(companyInfo.contains(str1[i])){
sb.append(companyInfo+"\t\u000B"+str1[i]+"\u000B\t\n");
}
}
File file1 = new File("/home/yhj/third_payee_data/third_payee_data1.dat");
FileUtils.writeStringToFile(file1, sb1.toString(), "UTF-8");
logger.info("end2================");
}
}
}