由于项目需要,需要用到storm做分布式计算与数据处理,storm的原理和相关介绍就不在此赘叙了。
项目中storm下发的bolt有2层:
首先编写一个topology:
public class HomeBandToplogy {
private static final String TOPOLOGY_NAME = "HomeBandToplogy";
private static final String KAFKA_SPOUT = "kafkaSpout";
private static final String KAFKA_BOLT = "kafkaBolt";
private static final String ANYNASIS_BOLT = "AnynasisBolt";
private static final Log log = LogFactory.getLog(HomeBandToplogy.class);
public static void main(String[] args) throws AuthorizationException {
PropertyUtil property = null;
try {
HBase.createTable(BaseFunction.ods_ott_userbehavior_buffer, new String[]{BaseFunction.ods_ott_userbehavior_buffer_family}, false);
HBase.createTable(BaseFunction.ods_ott_deviceinfo_buffer, new String[]{BaseFunction.ods_ott_deviceinfo_buffer_family}, false);
HBase.createTable(BaseFunction.ods_ott_videoinformation_buffer, new String[]{BaseFunction.ods_ott_videoinformation_buffer_family}, false);
HBase.createTable(BaseFunction.ods_ott_videoinfo_recommend_buffer, new String[]{BaseFunction.ods_ott_videoinfo_recommend_buffer_family}, false);
HBase.createTable(BaseFunction.ods_ott_deviceinfo_buffer, new String[]{BaseFunction.ods_ott_deviceinfo_buffer_family}, false);
} catch (Exception e) {
e.printStackTrace();
}
property = PropertyUtil.getInstance();
String zks = property.getString("kafka.zookeeper.server", "");//master:2181,node1:2181,node2:2181
String topic = property.getString("kafka.zookeeper.topic", "rawMessage");//消息的topic
String zkRoot = property.getString("kafka.zookeeper.zkRoot", "/storm-kafka");//strom在zookeeper上的根,生成日志。
String id = property.getString("kafka.zookeeper.id", "stormKafka");//自己定义的id
BrokerHosts brokerHosts = new ZkHosts(zks);
SpoutConfig spoutConf = new SpoutConfig(brokerHosts, topic, zkRoot, id);
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
spoutConf.ignoreZkOffsets = false;
KafkaSpout kafkaSpout = new KafkaSpout(spoutConf);
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(KAFKA_SPOUT, kafkaSpout, 1);
builder.setBolt(KAFKA_BOLT, new KafkaBolt(), 16).shuffleGrouping(KAFKA_SPOUT);
builder.setBolt(ANYNASIS_BOLT, new AnynasisBolt(), 72).shuffleGrouping(KAFKA_BOLT);
Config config = new Config();
config.setDebug(false);
config.setNumWorkers(16);
final int pendingnum = property.getInt("pendingnum", 0);
config.setMaxSpoutPending(pendingnum);//设置最大的appending数
if (args != null && args.length > 0) {
try {
StormSubmitter.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
} else {
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
}
}
}
然后编写一个kafkabolt和一个AnynasisBolt,如下:
kafkabolt:
public class KafkaBolt extends BaseRichBolt {
OutputCollector collector;
Log logger;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
logger = LogFactory.getLog(KafkaBolt.class);
}
@Override
public void execute(Tuple input) {
String line = "";
try {
line = input.getString(0);
final String[] temp = line.split("#KY#");
if (temp.length != 3) {
logger.error("thread id " + Thread.currentThread().getId() + " kafkaBolt invalid data " + line);
} else {
String id = temp[0];
final int code = Integer.valueOf(temp[1]);
final String value = temp[2];
AbstractBoxService info = BoxFactory.getBoxInstance(id, MobileBoxCode.valueOf(code), value);
if (info != null) {
collector.emit(input, new Values(id, info));
}
}
} catch (Exception e) {
logger.error("parse error :" + line + ",the casue:" + e.getMessage());
} finally {
collector.ack(input);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("id", "info"));
}
}
AnynasisBolt:
public class AnynasisBolt extends BaseRichBolt {
private OutputCollector collector;
Log logger;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
logger = LogFactory.getLog(AnynasisBolt.class);
}
@Override
public void execute(Tuple input) {
try {
//tuple里面是一个size为2的元组(id,info)
final String deviceid = input.getString(0);
AbstractBoxService info = (AbstractBoxService) input.getValues().get(1);
boolean status = true;
boolean result = info.executeRedis();
if (!result) status = false;
result = info.executeHbase();
if (!result) status = false;
if (!result) {
logger.error("AnynasisBolt error.the id:" + info.getId() + ",the value:" + info.getValue() + ",the code:" + info.getCode());
}
collector.ack(input);
System.out.println("AnynasisBolt.execute end...");
} catch (Exception e) {
collector.fail(input);
System.out.println("error....");
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
工厂类:
public class BoxFactory {
static Log logger = LogFactory.getLog(BoxFactory.class);
public static AbstractBoxService getBoxInstance(String deviceid, MobileBoxCode code, String value) {
AbstractBoxService info = null;
switch (code) {
//设备信息
case Device_UIXml:
info = new Device_UIXml(deviceid, code, value);
break;
case Device_BootTimeDelay:
info = new Device_BootTimeDelay(deviceid, code, value);
break;
case Device_BaseInfo:
info = new Device_BaseInfo(deviceid, code, value);
break;
case Device_ID:
info = new Device_ID(deviceid, code, value);
break;
case Device_APKVerSion:
info = new Device_APKVerSion(deviceid, code, value);
break;
case Device_ConnectionMode:
info = new Device_ConnectionMode(deviceid, code, value);
break;
case Device_LicencesName:
info = new Device_LicencesName(deviceid, code, value);
break;
case Device_SoftVersion:
info = new Device_SoftVersion(deviceid, code, value);
break;
case Device_SupplierName:
info = new Device_SupplierName(deviceid, code, value);
break;
case Device_TerminalID:
info = new Device_TerminalID(deviceid, code, value);
break;
case Device_TerminalMode:
info = new Device_TerminalMode(deviceid, code, value);
break;
// region 用户行为
case UserBehavior_Search:
info = new UserBehavior_Search(deviceid, code, value);
break;
case UserBehavior_PosterOpenTimes:
info = new UserBehavior_PosterOpenTimes(deviceid, code, value);
break;
case UserBehavior_ChannelSwitchWaitTimes:
info = new UserBehavior_ChannelSwitchWaitTimes(deviceid, code, value);
break;
case UserBehavior_OpenApp:
info = new UserBehavior_OpenApp(deviceid, code, value);
break;
case UserBehavior_In:
info = new UserBehavior_In(deviceid, code, value);
break;
case UserBehavior_TopIn:
info = new UserBehavior_TopIn(deviceid, code, value);
break;
// region 视频播放
case VideoPlay_Start:
info = new VideoPlay_Start(deviceid, code, value);
break;
case VideoPlay_End:
info = new VideoPlay_End(deviceid, code, value);
break;
case VideoPlay_Error:
info = new VideoPlay_Error(deviceid, code, value);
break;
case VideoPlay_FirstFrameWaitTimes:
info = new VideoPlay_FirstFrameWaitTimes(deviceid, code, value);
break;
case VideoPlay_Information:
info = new VideoPlay_Information(deviceid, code, value);
break;
case VideoPlay_EPGConfig:
info = new VideoPlay_EPGConfig(deviceid, code, value);
break;
case VideoPlay_KartunTimes:
info = new VideoPlay_KartunTimes(deviceid, code, value);
break;
case VideoPlayRecomend_End:
info = new VideoPlayRecomend_End(deviceid, code, value);
break;
case VideoPlayRecomend_Start:
info = new VideoPlayRecomend_Start(deviceid, code, value);
break;
// endregion
default:
break;
}
return info;
}
}
接口类:
public interface BoxService extends Serializable {
/**
* 数据写入redis
*
* @return
*/
public Boolean executeRedis();
/**
* 写入hbase
*
* @return
*/
public Boolean executeHbase();
}
抽象类:
**
-
构建实例抽象类
*/
public abstract class AbstractBoxService implements BoxService, Serializable {
private String id;
private MobileBoxCode code;
private String value;public AbstractBoxService(String id, MobileBoxCode code, String value) {
this.id = id;
this.code = code;
this.value = value;
}public String getId() {
return id;
}public void setId(String id) {
this.id = id;
}public MobileBoxCode getCode() {
return code;
}public void setCode(MobileBoxCode code) {
this.code = code;
}public String getValue() {
return value;
}public void setValue(String value) {
this.value = value;
}
@override
public Boolean executeRedis( return null;);
@override
public Boolean executeHbase( return null;);
@Override
public String toString() {
return "AbstractBoxService{" +
"id='" + id + '\'' +
", code=" + code +
", value='" + value + '\'' +
'}';
}
}
继承抽象类的实现类业务逻辑部分就不在这里描述了。
storm程序启动以后,小批量数据运行正常。
继续加大数据测试,数据量达到几十万的时候,出现异常,异常如下:
[ERROR] connection attempt 9 to Netty-Client-node5/172.16.1.100:6700 failed: java.net.ConnectException: Connection refused: node5/172.16.1.100:6700
2018-11-16 17:46:11.533 o.a.s.u.StormBoundedExponentialBackoffRetry client-boss-1 [WARN] WILL SLEEP FOR 420ms (MAX)
同时storm程序大量ack失败。
开始以后是线程数过多,以及环境资源紧张导致此种异常。
后来经过重重排查,将接口去掉,将抽象类中的方法变为抽象方法后,程序运行正常。
为何出现这样的异常呢?
原因在于,storm在处理的时候只会处理当前进程下的任务,跨进程的调度是无法实现的,故产生这样的故障。