在原在水力发电厂在线监测VC++6.0软件中实现kafka C++客户端,将原始波形信号及计算指标点通过消息avro序列化发送给大数据平台侧kafka集群。
C++搭建kafka客户端用了我半个月的时间,avro序列化用了半个月的时间。相信好多人也是苦于没有系统性的参考耽误了好多工夫,我写这篇文档希望能给别人一些辅助。
本地装了一套kafka的环境:
序号 | 名称 | 备注 | 下载链接 |
1 | JDK | Java开发环境 | https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html |
2 |
Zookeeper |
分布式应用程序协调服务 |
http://mirror.bit.edu.cn/apache/zookeeper/ |
3 |
Kafka |
Kafka开发环境 |
http://kafka.apache.org/downloads.html |
dataDir=D:\data\logs\zookeeper
dataLogDir=D:\data\logs\zookeeper (若原配置文件中无此行则将此行写在dataDir下面)
修改kafka服务端配置文件:打开D:\ kafka_2.12-2.2.0\config\ server.properties。log.dirs的值改成 log.dirs=D:\data\logs\kafka
使用testkafka.cpp,加载librdkafka动态链接库。代码大致如下:
#include "pch.h"
#include
#include
#include
#include
#include "..\..\rdkafka\rdkafka.h"
#pragma comment(lib, "librdkafka.lib")
static int run = 1;
static void stop(int sig)
{
run = 0;
fclose(stdin); /* abort fgets() */
}
static void dr_msg_cb(rd_kafka_t *rk,
const rd_kafka_message_t *rkmessage, void *opaque) {
if (rkmessage->err)
fprintf(stderr, "%% Message delivery failed: %s\n",
rd_kafka_err2str(rkmessage->err));
else
fprintf(stderr,
"%% Message delivered (%zd bytes, "
"partition %d)\n",
rkmessage->len, rkmessage->partition);
}
int main(int argc, char **argv) {
rd_kafka_t *rk; /* Producer instance handle */
rd_kafka_topic_t *rkt; /* Topic object */
rd_kafka_conf_t *conf; /* Temporary configuration object */
char errstr[512]; /* librdkafka API error reporting buffer */
char buf[512]; /* Message value temporary buffer */
const char *brokers; /* Argument: broker list */
const char *topic; /* Argument: topic to produce to */
if (argc != 3) {
fprintf(stderr, "%% Usage: %s \n", argv[0]);
return 1;
}
brokers = argv[1];
topic = argv[2];
conf = rd_kafka_conf_new();
if (rd_kafka_conf_set(conf, "bootstrap.servers", brokers,
errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) {
fprintf(stderr, "%s\n", errstr);
return 1;
}
rd_kafka_conf_set_dr_msg_cb(conf, dr_msg_cb);
rk = rd_kafka_new(RD_KAFKA_PRODUCER, conf, errstr, sizeof(errstr));
if (!rk) {
fprintf(stderr,
"%% Failed to create new producer: %s\n", errstr);
return 1;
}
rkt = rd_kafka_topic_new(rk, topic, NULL);
if (!rkt) {
fprintf(stderr, "%% Failed to create topic object: %s\n",
rd_kafka_err2str(rd_kafka_last_error()));
rd_kafka_destroy(rk);
return 1;
}
signal(SIGINT, stop);
fprintf(stderr,
"%% Type some text and hit enter to produce message\n"
"%% Or just hit enter to only serve delivery reports\n"
"%% Press Ctrl-C or Ctrl-D to exit\n");
while (run && fgets(buf, sizeof(buf), stdin)) {
size_t len = strlen(buf);
if (buf[len - 1] == '\n') /* Remove newline */
buf[--len] = '\0';
if (len == 0) {
/* Empty line: only serve delivery reports */
rd_kafka_poll(rk, 0/*non-blocking */);
continue;
}
retry:
if (rd_kafka_produce(
rkt,
RD_KAFKA_PARTITION_UA,
RD_KAFKA_MSG_F_COPY,
buf, len,
NULL, 0,
NULL) == -1) {
fprintf(stderr,
"%% Failed to produce to topic %s: %s\n",
rd_kafka_topic_name(rkt),
rd_kafka_err2str(rd_kafka_last_error()));
if (rd_kafka_last_error() ==
RD_KAFKA_RESP_ERR__QUEUE_FULL) {
rd_kafka_poll(rk, 1000/*block for max 1000ms*/);
goto retry;
}
}
else {
fprintf(stderr, "%% Enqueued message (%zd bytes) "
"for topic %s\n",
len, rd_kafka_topic_name(rkt));
}
rd_kafka_poll(rk, 0/*non-blocking*/);
}
fprintf(stderr, "%% Flushing final messages..\n");
rd_kafka_flush(rk, 10 * 1000 /* wait for max 10 seconds */);
rd_kafka_topic_destroy(rkt);
rd_kafka_destroy(rk);
return 0;
}
测试的时候可能出现的问题:
- broker may not available:这是因为kafka-server-start.bat没执行成功。
- count not reserve enouth space 1048576 object heap:
找到JDK路径下的bin下运行jvisualvm.exe。 查看JDK堆栈最大空间。例本机256M
kafka路径下\bin\windows\kafka-server-start.bat 中修改 64位系统堆栈空间修改为:KAFKA_HEAP_OPTS=-Xmx256M -Xms128M;
重新执行kafka-server-start.bat;
至此kafka环境搭建好了。下一步搭建avro C++环境。avro的编译环境比较费劲,编译了一周多最后还是我们老大任总搞定的。
装avro C++可以参考官方手册:https://avro.apache.org/docs/current/api/cpp/html/index.html
先装CMAKE,我用的编译器是CMAKE版本是3.14.4;
装BOOST库,我用的是BOOST版本是1.70.0,参数中选用VS2017对应版本的编译器,静态链接,用boost库编译出来几个lib,分别为:boost_filesystem.lib、boost_iostreams.lib、boost_program_options.lib、boost_system.lib。正常编译完boost以后库的名称不是这个,自己根据自己要用debug版还是release版还是,然后库用的静态链接还是动态链接。
用cmake打开avro C++的原代码生成一个VS2017的工程文件,然后用VS2017编译生成avro-cpp.dll。(中途有些麻烦,有些地方是我们老大帮我弄的,主要是boost库的配置和zlib的库)
我这个项目的需求是,大数据平台有一套avro 的消息格式,这个格式叫做schema,是一个形式为JSON字符串的一个avro格式。
需要将数据序列化成schema通过kafka客户端发送给大数据平台。在avro-cpp.sln的工程里有一个工程叫做avrogencpp的工程,把这个工程编译生成一个exe,然后子命令行输入命令:avrogencpp -i XXX.JSON -o XXX.h -n c
会生成一个头文件,需要把数据存进这个头文件中包含的类中然后把数据序列化发送。
但是C++版本的avro与java的有点区别,java序列化之后数据前面会有一个头部信息,头部信息包含三组信息:
"O"“b”“j”“0x01”四个字节;
JSON的schema;
同步位;
avro C++序列化时虽然没有这个头部信息,但是c++版本序列化时存成序列化文件时会包含头部信息,可以用现成的api。这样就能实现C++版本客户端发送avro序列化的全部功能了。
代码奉上:.h
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GMDATA_H_487324861__H_
#define GMDATA_H_487324861__H_
#include
#include "boost/any.hpp"
#include "avro/Specific.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
namespace c {
struct GMData_json_Union__0__ {
private:
size_t idx_;
boost::any value_;
public:
size_t idx() const { return idx_; }
float get_float() const;
void set_float(const float& v);
std::string get_string() const;
void set_string(const std::string& v);
GMData_json_Union__0__();
};
struct RtValue {
typedef GMData_json_Union__0__ value_t;
int64_t time;
value_t value;
RtValue() :
time(int64_t()),
value(value_t())
{ }
};
struct RtData {
std::string code;
std::vector data;
RtData() :
code(std::string()),
data(std::vector())
{ }
};
inline
float GMData_json_Union__0__::get_float() const {
if (idx_ != 0) {
throw avro::Exception("Invalid type for union");
}
return boost::any_cast(value_);
}
inline
void GMData_json_Union__0__::set_float(const float& v) {
idx_ = 0;
value_ = v;
}
inline
std::string GMData_json_Union__0__::get_string() const {
if (idx_ != 1) {
throw avro::Exception("Invalid type for union");
}
return boost::any_cast(value_);
}
inline
void GMData_json_Union__0__::set_string(const std::string& v) {
idx_ = 1;
value_ = v;
}
inline GMData_json_Union__0__::GMData_json_Union__0__() : idx_(0), value_(float()) { }
}
namespace avro {
template<> struct codec_traits {
static void encode(Encoder& e, c::GMData_json_Union__0__ v) {
e.encodeUnionIndex(v.idx());
switch (v.idx()) {
case 0:
avro::encode(e, v.get_float());
break;
case 1:
avro::encode(e, v.get_string());
break;
}
}
static void decode(Decoder& d, c::GMData_json_Union__0__& v) {
size_t n = d.decodeUnionIndex();
if (n >= 2) { throw avro::Exception("Union index too big"); }
switch (n) {
case 0:
{
float vv;
avro::decode(d, vv);
v.set_float(vv);
}
break;
case 1:
{
std::string vv;
avro::decode(d, vv);
v.set_string(vv);
}
break;
}
}
};
template<> struct codec_traits {
static void encode(Encoder& e, const c::RtValue& v) {
avro::encode(e, v.time);
avro::encode(e, v.value);
}
static void decode(Decoder& d, c::RtValue& v) {
if (avro::ResolvingDecoder *rd =
dynamic_cast(&d)) {
const std::vector fo = rd->fieldOrder();
for (std::vector::const_iterator it = fo.begin();
it != fo.end(); ++it) {
switch (*it) {
case 0:
avro::decode(d, v.time);
break;
case 1:
avro::decode(d, v.value);
break;
default:
break;
}
}
} else {
avro::decode(d, v.time);
avro::decode(d, v.value);
}
}
};
template<> struct codec_traits {
static void encode(Encoder& e, const c::RtData& v) {
avro::encode(e, v.code);
avro::encode(e, v.data);
}
static void decode(Decoder& d, c::RtData& v) {
if (avro::ResolvingDecoder *rd =
dynamic_cast(&d)) {
const std::vector fo = rd->fieldOrder();
for (std::vector::const_iterator it = fo.begin();
it != fo.end(); ++it) {
switch (*it) {
case 0:
avro::decode(d, v.code);
break;
case 1:
avro::decode(d, v.data);
break;
default:
break;
}
}
} else {
avro::decode(d, v.code);
avro::decode(d, v.data);
}
}
};
}
#endif
.cpp
int GKafkaPushFeild(const char *BrokersName, const char *TopicName, unsigned char *Data, int Size, int FeildNum, char *Return)
{
if ((BrokersName == NULL) || (TopicName == NULL) || (Data == NULL))
{
return -1;
}
int l_iRetCode = 0;
int l_iIndex = 0;
std::vector l_sSchema;
c::RtValue l_value;
std::vector l_RtData;
__int64 l_lTime = 0;
float l_fTemp = 0.0F;
int l_iFeildNum = FeildNum;
unsigned char *l_cpTempData = Data;
unsigned char *l_cStartData = Data;
char l_cTempCode[CODENUM + 1] = { 0 };
memcpy(&l_lTime, l_cpTempData, 8);
l_cpTempData += 8;
char filename[1024] = "tempData.avro";
remove(filename);
avro::ValidSchema writerSchema(makeValidSchema(sch));
typedef std::pair Pair;
avro::DataFileWriter df(filename, writerSchema,100);
for (int i = 0; i < l_iFeildNum; i++)
{
c::RtData l_cRtData;
c::RtValue l_cRtValue;
memcpy(l_cTempCode, l_cpTempData, CODENUM);
l_cTempCode[CODENUM - 1] = '\0';
l_cpTempData += CODENUM;
memcpy(&l_fTemp, l_cpTempData, DATANUM);
l_cpTempData += DATANUM;
l_cRtData.code = l_cTempCode;
//memcpy(l_RtData[l_iIndex].code.c_str,l_cTempCode,CODENUM);
//l_RtData[l_iIndex].code = l_cTempCode;
l_cRtValue.time = l_lTime;
l_cRtValue.value.set_float(l_fTemp);
l_cRtData.data.push_back(l_cRtValue);
l_RtData.push_back(l_cRtData);
//Pair l_pair(writerSchema, l_cRtData);
df.write(l_cRtData);
}
df.close();
FILE *l_fp = NULL;
char l_char = 'a';
int l_iReadNum = 0;
std::string strContent;
errno_t l_err = fopen_s(&l_fp, filename, "rb");
if (l_err < 0)
{
return -1;
}
else
{
fseek(l_fp, 0, SEEK_END);
l_iReadNum = ftell(l_fp);
if (0 == l_iReadNum)
{
l_iReadNum = 0;
strContent = "";
fclose(l_fp);
return 1;
}
strContent.resize(l_iReadNum);
fseek(l_fp, 0, SEEK_SET);
fread((char*)&strContent[0], l_iReadNum, 1, l_fp);
}
fclose(l_fp);
remove(filename);
return 0;
}
从工程里面粘出来的,需要自己手动改一下生成一个动态链接库给VC6.0调用。希望对你有帮助!