成功安装jdk及maven。
目前YCSB没有实现Clickhouse客户端,因此需要我们自己实现。
添加新database客户端的wiki:Adding a Database
git clone https://github.com/brianfrankcooper/YCSB.git
或者直接下载压缩包解压。
项目根目录右键添加module。
ArtifactId为待测试客户端名称,这里为clickhouse-binding
。
模块创建完成后,可以在根项目的pom文件中看到新加模块。
添加相关依赖。
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0modelVersion>
<parent>
<groupId>site.ycsbgroupId>
<artifactId>binding-parentartifactId>
<version>0.18.0-SNAPSHOTversion>
<relativePath>../binding-parentrelativePath>
parent>
<artifactId>clickhouse-bindingartifactId>
<name>Clickhouse Bindingname>
<packaging>jarpackaging>
<dependencies>
<dependency>
<groupId>ru.yandex.clickhousegroupId>
<artifactId>clickhouse-jdbcartifactId>
<version>0.1.52version>
dependency>
<dependency>
<groupId>site.ycsbgroupId>
<artifactId>coreartifactId>
<version>${project.version}version>
<scope>providedscope>
dependency>
<dependency>
<groupId>org.slf4jgroupId>
<artifactId>slf4j-apiartifactId>
<version>1.7.13version>
dependency>
<dependency>
<groupId>org.slf4jgroupId>
<artifactId>slf4j-simpleartifactId>
<version>1.7.13version>
dependency>
<dependency>
<groupId>junitgroupId>
<artifactId>junitartifactId>
<version>4.12version>
<scope>testscope>
dependency>
dependencies>
project>
客户端实现,由于这里我只需要测试查询性能,且在现有数据集上测试,因此只实现了scan()
方法。另外,我测试的sql只有一个返回值,因此只用了一个key
表示,若需多个返回值,可以通过传入字段数组的方式对其进行修改。
package site.ycsb.db.clickhouse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHouseDataSource;
import ru.yandex.clickhouse.settings.ClickHouseProperties;
import site.ycsb.ByteIterator;
import site.ycsb.DBException;
import site.ycsb.Status;
import site.ycsb.StringByteIterator;
import java.sql.*;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Clickhouse client for YCSB framework.
*/
public class ClickhouseClient extends site.ycsb.DB {
private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseClient.class);
/** Count the number of times initialized to teardown on the last. */
private static final AtomicInteger INIT_COUNT = new AtomicInteger(0);
private static ClickHouseConnection connection;
// clickhouse url
private static final String CONNECTION_URL = "clickhouse.url";
// clickhouse port
private static final String CONNECTION_PORT = "clickhouse.port";
// clickhouse database name
private static final String CONNECTION_DB = "clickhouse.db";
// clickhouse username
private static final String CONNECTION_USER = "clickhouse.username";
// clickhouse password
private static final String CONNECTION_PASSWORD = "clickhouse.password";
// sql for test
private static final String SCAN_SQL = "clickhouse.scan_sql";
// sql result
private static final String RES_KEY = "clickhouse.key";
private static final String DEFAULT_PROP = "";
private String sql;
private String key;
private PreparedStatement statement;
@Override
public void init() throws DBException {
INIT_COUNT.incrementAndGet();
synchronized (ClickhouseClient.class) {
Properties props = getProperties();
String url = props.getProperty(CONNECTION_URL, DEFAULT_PROP);
String port = props.getProperty(CONNECTION_PORT, DEFAULT_PROP);
String db = props.getProperty(CONNECTION_DB, DEFAULT_PROP);
String username = props.getProperty(CONNECTION_USER, DEFAULT_PROP);
String password = props.getProperty(CONNECTION_PASSWORD, DEFAULT_PROP);
sql = props.getProperty(SCAN_SQL, DEFAULT_PROP);
key = props.getProperty(RES_KEY, DEFAULT_PROP);
ClickHouseProperties properties = new ClickHouseProperties();
properties.setUser(username);
properties.setPassword(password);
properties.setDatabase(db);
properties.setSocketTimeout(60000000);
ClickHouseDataSource clickHouseDataSource = new ClickHouseDataSource("jdbc:clickhouse://" + url + ":" + port, properties);
try {
connection = clickHouseDataSource.getConnection();
System.out.println(connection);
statement = connection.prepareStatement(sql);
} catch (SQLException e) {
LOGGER.error(e.getMessage());
}
}
}
@Override
public void cleanup() throws DBException {
if (INIT_COUNT.decrementAndGet() == 0) {
try {
if (!connection.getAutoCommit()){
connection.commit();
}
connection.close();
} catch (SQLException e) {
System.err.println("Error in cleanup execution. " + e);
}
}
}
@Override
public Status read(String table, String key, Set<String> fields, Map<String, ByteIterator> result) {
return null;
}
@Override
public Status scan(String table, String startkey, int recordcount, Set<String> fields, Vector<HashMap<String, ByteIterator>> result) {
try {
ResultSet resultSet = statement.executeQuery();
if (!resultSet.next()) {
resultSet.close();
return Status.NOT_FOUND;
}
HashMap<String, ByteIterator> values = new HashMap<String, ByteIterator>();
Integer value = resultSet.getInt(key);
values.put(key, new StringByteIterator(value.toString()));
result.add(values);
resultSet.close();
} catch (SQLException e) {
LOGGER.error(e.getMessage());
return Status.ERROR;
}
return Status.OK;
}
@Override
public Status update(String table, String key, Map<String, ByteIterator> values) {
return null;
}
@Override
public Status insert(String table, String key, Map<String, ByteIterator> values) {
return null;
}
@Override
public Status delete(String table, String key) {
return null;
}
}
实现单测。
package site.ycsb.db.clickhouse;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHouseDataSource;
import ru.yandex.clickhouse.settings.ClickHouseProperties;
import site.ycsb.ByteIterator;
import site.ycsb.DBException;
import java.io.IOException;
import java.net.Socket;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Properties;
import java.util.Vector;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.Assert.assertThat;
import static org.junit.Assume.assumeNoException;
public class ClickhouseClientTest {
private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseClientTest.class);
private static ClickhouseClient clickhouseClient;
private static ClickHouseConnection connection;
private static final String CONNECTION_URL = "clickhouse.url";
// your clickhouse ip
private static final String CONNECTION_URL_INPUT = "your clickhouse ip";
private static final String CONNECTION_PORT = "clickhouse.port";
// your clickhouse port
private static final String CONNECTION_PORT_INPUT = "your clickhouse port";
private static final String CONNECTION_DB = "clickhouse.db";
// your database name
private static final String CONNECTION_DB_INPUT = "database name";
private static final String CONNECTION_USER = "clickhouse.username";
// your username
private static final String CONNECTION_USER_INPUT = "your username";
private static final String CONNECTION_PASSWORD = "clickhouse.password";
// your password
private static final String CONNECTION_PASSWORD_INPUT = "your password";
private static final String SCAN_SQL = "clickhouse.scan_sql";
// your test sql
private static final String SCAN_SQL_INPUT = "select uniqExact(user_id) as count from tb";
private static final String RES_KEY = "clickhouse.key";
// the field returned by sql
private static final String RES_KEY_INPUT = "count";
@BeforeClass
public static void setUp() {
try (Socket socket = new Socket(CONNECTION_URL_INPUT, Integer.valueOf(CONNECTION_PORT_INPUT))){
assertThat("Socket is not bound.", socket.getLocalPort(), not(-1));
} catch (IOException connectFailed) {
assumeNoException("PostgreSQL is not running. Skipping tests.", connectFailed);
}
Properties props = new Properties();
props.setProperty(CONNECTION_URL, CONNECTION_URL_INPUT);
props.setProperty(CONNECTION_PORT, CONNECTION_PORT_INPUT);
props.setProperty(CONNECTION_DB, CONNECTION_DB_INPUT);
props.setProperty(CONNECTION_USER, CONNECTION_USER_INPUT);
props.setProperty(CONNECTION_PASSWORD, CONNECTION_PASSWORD_INPUT);
props.setProperty(SCAN_SQL, SCAN_SQL_INPUT);
props.setProperty(RES_KEY, RES_KEY_INPUT);
try{
ClickHouseProperties properties = new ClickHouseProperties();
properties.setUser(CONNECTION_USER_INPUT);
properties.setPassword(CONNECTION_PASSWORD_INPUT);
properties.setDatabase(CONNECTION_DB_INPUT);
properties.setSocketTimeout(600000);
ClickHouseDataSource clickHouseDataSource = new ClickHouseDataSource("jdbc:clickhouse://" + CONNECTION_URL_INPUT + ":" + CONNECTION_PORT_INPUT, properties);
connection = clickHouseDataSource.getConnection();
boolean tableExists = connection.getMetaData().getTables(null, null, "tb_user_action_3", null).next();
assertThat("Table does not exist.", tableExists, not(false));
clickhouseClient = new ClickhouseClient();
clickhouseClient.setProperties(props);
clickhouseClient.init();
} catch (SQLException | DBException e){
LOGGER.error(e.toString());
}
}
@Test
public void scanTest() {
Vector<HashMap<String, ByteIterator>> results = new Vector<>();
clickhouseClient.scan("", "", 1, null, results);
LOGGER.info(results.toString());
}
}
YCSB的bin
目录下事先准备好了执行脚本,但为了可以正确使用它,需要做一些准备。
首先修改bin
目录下bindings.properties
文件,添加clickhouse标识,使脚本可以识别clickhouse,并映射到clickhouse的客户端。
如果使用python脚本ycsb
,则还需修改脚本,在DATABASES
字典中添加clickhouse的映射。
为了使测试方便,我还在脚本中添加了打包时跳过代码风格检查的flag。
别的脚本同理,修改mvn
命令即可。
如果嫌麻烦,可以直接在项目根目录下:
mvn clean
mvn install
也可以单独给模块打包,在core
、binding-parent
等自定义数据库客户端依赖的包目录以及clickhouse
包目录下执行:
mvn clean
mvn install
在workloads
目录下复制一份配置文件模板。
cp workload_template workload_clickhouse
...
# The name of the workload class to use
workload=site.ycsb.workloads.CoreWorkload
# The inputs of clickhouse
# 完善信息
clickhouse.url=
clickhouse.port=
clickhouse.db=
clickhouse.username=
clickhouse.password=
clickhouse.scan_sql=
clickhouse.key=
# There is no default setting for operationcount but it is
# required to be set.
# The number of operations to use during the run phase.
# 总操作数
operationcount=30
...
# What proportion of operations are reads
readproportion=0
# What proportion of operations are updates
updateproportion=0
# What proportion of operations are inserts
insertproportion=0
# What proportion of operations read then modify a record
readmodifywriteproportion=0
# scan操作占比100%
# What proportion of operations are scans
scanproportion=1
...
在项目根目录下:
./bin/ycsb run clickhouse -threads 2 -target 10 -P ./workloads/workload_clickhouse