最近在做一个小功能,具体逻辑是通过代码读取datahub数据并把获取到的数据产出成数据文件。因为数据量较大,所以考虑到使用多线程来运行。下面是主要代码
线程启动方法
public void start() throws Exception {
String shardId = "0";
String name = "ThreadShardOne";
ThreadReadDataHub shardOne = new ThreadReadDataHub(name, shardId);
shardOne.start();
logger.info("线程》"+name+"已经启动");
shardId = "1";
name = "ThreadShardTwo";
ThreadReadDataHub shardTwo = new ThreadReadDataHub(name, shardId);
shardTwo.start();
logger.info("线程》"+name+"已经启动");
System.out.println("线程》"+name+"已经启动");
// shardId = "2";
// name = "ThreadShardThree";
// ThreadReadDataHub shardThree = new ThreadReadDataHub(name, shardId);
// shardThree.start();
}
读取datahub代码
/**
* 每个文件的最大数据条数
*
* @param maxSize
*/
public void start(Integer maxSize, String shardId) throws Exception {
long start = System.currentTimeMillis();
List result = new ArrayList<>();
//默认130000
if (maxSize == null || maxSize <= 0) {
maxSize = 30000;
}
boolean bExit = false;
GetTopicResult topicResult = client.getTopic(projectName, topicName);
// 首先初始化offset上下文
OffsetContext offsetCtx = client.initOffsetContext(projectName, topicName, subId, shardId);
String cursor = null; // 开始消费的cursor
if (!offsetCtx.hasOffset()) {
// 之前没有存储过点位,先获取初始点位,比如这里获取当前该shard最早的数据
GetCursorResult cursorResult = client.getCursor(projectName, topicName, shardId, CursorType.OLDEST);
// GetCursorResult cursorResult = client.getCursor(projectName, topicName, shardId, System.currentTimeMillis() - 24 * 3600 * 1000 /* ms */);
cursor = cursorResult.getCursor();
} else {
// 否则,获取当前已消费点位的下一个cursor
cursor = client.getNextOffsetCursor(offsetCtx).getCursor();
}
// logger.info("Start consume records, begin offset context:" + offsetCtx.toObjectNode().toString()
// + ", cursor:" + cursor);
long recordNum = 0L;
int limit = 30000;
while (!bExit) {
try {
GetRecordsResult recordResult = client.getRecords(projectName, topicName, shardId, cursor, limit,
topicResult.getRecordSchema());
List records = recordResult.getRecords();
if (records.size() == 0) {
if (result.size() > 0) {
//生成文件
TextUtils.writeText(result, fileName,filePath);
result.clear();
logger.info("获取订阅数据至写入文件共耗时>>>>>>>>>>>>>>>>>" + (System.currentTimeMillis() - start) + "毫秒");
logger.info("###########################shardId=" + shardId + "###########################");
start = System.currentTimeMillis();
}
// 将最后一次消费点位上报
client.commitOffset(offsetCtx);
// logger.info("commit offset suc! offset context: " + offsetCtx.toObjectNode().toString());
// 可以先休眠一会,再继续消费新记录
// thread.sleep(1000);
logger.info("sleep 1s and continue consume records! shard id:" + shardId);
} else {
if (!records.isEmpty()) {
for (RecordEntry record : records) {
// 处理记录逻辑
// System.out.println("Consume shard:" + shardId + " thread process record:"
// + record.toJsonNode().toString());
result.add(
record.getBigint("111") + common
+ record.getString("222") + common
+ record.getBigint("333") + common
+ record.getBigint("444") + common
+ record.getDouble("555") + common
+ record.getBigint("666") + common
+ record.getBigint("777") + common
+ record.getBigint("888") + common
+ record.getBigint("999") + common
+ record.getString("1212")
);
if (result.size() == maxSize) {
//生成文件
TextUtils.writeText(result, fileName,filePath);
result.clear();
logger.info("获取订阅数据至写入文件共耗时>>>>>>>>>>>>>>>>>" + (System.currentTimeMillis() - start) + "毫秒");
logger.info("###########################shardId=" + shardId + "###########################");
start = System.currentTimeMillis();
}
// 上报点位,该示例是每处理100条记录上报一次点位
offsetCtx.setOffset(record.getOffset());
recordNum++;
if (recordNum % 100 == 0) {
client.commitOffset(offsetCtx);
// logger.info("commit offset suc! offset context: " + offsetCtx.toObjectNode().toString());
}
}
}
cursor = recordResult.getNextCursor();
}
} catch (SubscriptionOfflineException e) {
// 订阅下线,退出
bExit = true;
logger.error("订阅下线,退出"+e.getMessage());
throw new Exception(e);
} catch (OffsetResetedException e) {
// 点位被重置,更新offset上下文
client.updateOffsetContext(offsetCtx);
cursor = client.getNextOffsetCursor(offsetCtx).getCursor();
// logger.info("Restart consume shard:" + shardId + ", reset offset:" + offsetCtx.toObjectNode().toString() + ", cursor:" + cursor);
logger.error("点位被重置,更新offset上下文"+e.getMessage());
} catch (OffsetSessionChangedException e) {
// 其他consumer同时消费了该订阅下的相同shard,退出
bExit = true;
logger.error(" 其他consumer同时消费了该订阅下的相同shard,退出"+e.getMessage());
throw new Exception(e);
}catch (InvalidCursorException ex) {
// 非法游标或游标已过期,建议重新定位后开始消费
// 针对于(the cursor is expired)
GetCursorResult cursorRs = client.getCursor(projectName, topicName, shardId, CursorType.LATEST);
cursor = cursorRs.getCursor();
} catch (Exception e) {
bExit = true;
logger.error(" 异常退出"+e.getMessage());
throw new Exception(e);
}
}
}
产出数据文件代码
/**
* 写入txt文件
*
* @param result
* @param fileName
* @return
*/
public static boolean writeText(List result, String fileName,String filePath) {
StringBuffer content = new StringBuffer();
boolean flag = false;
BufferedWriter out = null;
try {
if (result != null && !result.isEmpty() && StringUtils.isNotEmpty(fileName)) {
fileName += "_" + DateUtils.getCurrentTime_yyyyMMddHHmmssSSS() + ".txt";
// System.out.println("fileName>>>>>>>>>>>>>"+fileName);
File pathFile = new File(filePath);
if (!pathFile.exists()) {
pathFile.mkdir();
}
String relFilePath = null;
if(filePath.endsWith(File.separator)){
relFilePath = filePath + fileName;
}else{
relFilePath = filePath + File.separator + fileName;
}
File file = new File(relFilePath);
if (!file.exists()) {
file.createNewFile();
}
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "GBK"));
for (String info : result) {
out.write(info);
out.newLine();
}
flag = true;
}
if (out != null) {
try {
out.flush();
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}catch(Exception e){
e.printStackTrace();
}finally{
return flag;
}
}
然后数据运行起来后就发现生成的文件里面的数据有些是不对的,有些行多几个字段,有些行少了几个字段。
后来查了诸多资料,然后就知道了多个线程访问同一个方法时,为了保证数据的一致性,需要对共同访问的方法加同步锁,这个很重要!
于是就把生成文件的代码修改了成了
/**
* 写入txt文件
*
* @param result
* @param fileName
* @return
*/
public static synchronized boolean writeText(List result, String fileName,String filePath) {
StringBuffer content = new StringBuffer();
boolean flag = false;
BufferedWriter out = null;
try {
if (result != null && !result.isEmpty() && StringUtils.isNotEmpty(fileName)) {
fileName += "_" + DateUtils.getCurrentTime_yyyyMMddHHmmssSSS() + ".txt";
// System.out.println("fileName>>>>>>>>>>>>>"+fileName);
File pathFile = new File(filePath);
if (!pathFile.exists()) {
pathFile.mkdir();
}
String relFilePath = null;
if(filePath.endsWith(File.separator)){
relFilePath = filePath + fileName;
}else{
relFilePath = filePath + File.separator + fileName;
}
File file = new File(relFilePath);
if (!file.exists()) {
file.createNewFile();
}
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "GBK"));
for (String info : result) {
out.write(info);
out.newLine();
}
flag = true;
}
if (out != null) {
try {
out.flush();
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}catch(Exception e){
e.printStackTrace();
}finally{
return flag;
}
}
然后问题就解决了。