避开JanusGraph的兼容性坑,在ubuntu下使用idea进行janusgraph的OLAP开发(windows下开发会遇到兼容性问题)。使用hbase做存储后端/使用spark做计算引擎。
各安装包版本
主机环境
参数 | 值 |
---|---|
ip | 10.0.10.192 |
hostname | centos_mini |
systemctl stop firewalld.service #停止firewall
systemctl disable firewalld.service #禁止firewall开机启动
下载安装java的rpm
rpm jdk-8u191-linux-x64.rpm
下载安装scala的rpm
rpm scala-2.11.8.rpm
参考
hostnamectl set-hostname centos_mini
tar -zxf spark-2.2.0-bin-hadoop2.7.tgz -C /opt
cd /opt/spark-2.2.0-bin-hadoop2.7
./sbin/start-master.sh --host 10.0.10.192
./sbin/start-slave.sh spark://10.0.10.192:7077
#查看是否有进程
jps
#使用spark shell
./bin/spark-shell --master spark://10.0.10.192:7077
tar -zxf hbase-1.2.6.1-bin.tar.gz -C /opt
cd /opt/hbase-1.2.6.1
vim conf/hbase-site.xml
添加配置
<configuration>
<property>
<name>hbase.rootdirname>
<value>file:/opt/hbase1.2.6Datavalue>
property>
configuration>
vim conf/hbase-env.sh
添加
export JAVA_HOME=/usr/java/latest/
注释掉
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
启动
./bin/start-hbase.sh
测试
./bin/hbase shell
unzip janusgraph-0.3.1-hadoop2.zip -d /opt
<dependencies>
<dependency>
<groupId>org.janusgraphgroupId>
<artifactId>janusgraph-allartifactId>
<version>0.3.1version>
dependency>
dependencies>
以/opt/janusgraph-0.3.1-hadoop2/conf/hadoop-graph/read-hbase.properties
为蓝本进行修改,另存为new-read-hbase.properties
#
# Hadoop Graph Configuration
#
gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
gremlin.hadoop.graphReader=org.janusgraph.hadoop.formats.hbase.HBaseInputFormat
gremlin.hadoop.graphWriter=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat
gremlin.hadoop.jarsInDistributedCache=true
gremlin.hadoop.inputLocation=none
gremlin.hadoop.outputLocation=output
#
# JanusGraph HBase InputFormat configuration
#
janusgraphmr.ioformat.conf.storage.backend=hbase
janusgraphmr.ioformat.conf.storage.hostname=10.0.10.192
#
# SparkGraphComputer Configuration
#
spark.master=spark://10.0.10.192:7077
spark.executor.memory=512m
spark.serializer=org.apache.spark.serializer.KryoSerializer
spark.kryo.registrator=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator
spark.driver.memory=1g
# 为了executor能找到janus相关包
spark.executor.extraClassPath=/opt/janusgraph-0.3.1-hadoop2/lib/*
public static void main(String[] args) throws Exception {
Graph graph = GraphFactory.open("new-read-hbase.properties");
GraphTraversalSource g = graph.traversal().withComputer(SparkGraphComputer.class);
//往图中写入数据
//generateModern(graph);
//g.tx().commit();
List<Vertex> it = g.V().fold().next();
for (Vertex i : it) {
System.out.println(i);
}
Object r = g.V().count().next();
System.out.println(r);
graph.close();
}
public static void generateModern(final JanusGraph graph) {
final Vertex marko = graph.addVertex( T.label, "person", "name", "marko", "age", 29);
final Vertex vadas = graph.addVertex(T.label, "person", "name", "vadas", "age", 27);
final Vertex lop = graph.addVertex(T.label, "software", "name", "lop", "lang", "java");
final Vertex josh = graph.addVertex(T.label, "person", "name", "josh", "age", 32);
final Vertex ripple = graph.addVertex(T.label, "software", "name", "ripple", "lang", "java");
final Vertex peter = graph.addVertex( T.label, "person", "name", "peter", "age", 35);
marko.addEdge("knows", vadas, "weight", 0.5d);
marko.addEdge("knows", josh, "weight", 1.0d);
marko.addEdge("created", lop, "weight", 0.4d);
josh.addEdge("created", ripple, "weight", 1.0d);
josh.addEdge("created", lop, "weight", 0.4d);
peter.addEdge("created", lop, "weight", 0.2d);
}
虚拟机环境
ip | hostname | 内存 |
---|---|---|
10.0.20.20 | cdh1 | 8G |
10.0.20.21 | cdh2 | 8G |
10.0.20.22 | cdh3 | 8G |
不赘述。在线安装默认使用java1.7,安装后需要手动改成1.8。
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0modelVersion>
<groupId>yatwonggroupId>
<artifactId>JanusGraphMavenProjectartifactId>
<version>1.0-SNAPSHOTversion>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-shade-pluginartifactId>
<version>2.2version>
<configuration>
<filters>
<filter>
<artifact>*:*artifact>
<excludes>
<exclude>META-INF/*.SFexclude>
<exclude>META-INF/*.DSAexclude>
<exclude>META-INF/*.RSAexclude>
excludes>
filter>
filters>
configuration>
<executions>
<execution>
<phase>packagephase>
<goals>
<goal>shadegoal>
goals>
execution>
executions>
plugin>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-compiler-pluginartifactId>
<configuration>
<source>8source>
<target>8target>
configuration>
plugin>
plugins>
build>
<repositories>
<repository>
<id>clouderaid>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/url>
repository>
repositories>
<dependencies>
<dependency>
<groupId>org.janusgraphgroupId>
<artifactId>janusgraph-coreartifactId>
<version>0.2.2version>
dependency>
<dependency>
<groupId>org.janusgraphgroupId>
<artifactId>janusgraph-hadoopartifactId>
<version>0.2.2version>
dependency>
<dependency>
<groupId>org.apache.tinkerpopgroupId>
<artifactId>hadoop-gremlinartifactId>
<version>3.2.9version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-clientartifactId>
exclusion>
exclusions>
dependency>
<dependency>
<groupId>org.apache.tinkerpopgroupId>
<artifactId>spark-gremlinartifactId>
<version>3.2.9version>
<exclusions>
<exclusion>
<groupId>org.apache.sparkgroupId>
<artifactId>spark-core_2.10artifactId>
exclusion>
exclusions>
dependency>
<dependency>
<groupId>org.apache.sparkgroupId>
<artifactId>spark-core_2.10artifactId>
<version>1.6.0-cdh5.16.1version>
dependency>
<dependency>
<groupId>org.apache.hbasegroupId>
<artifactId>hbase-shaded-clientartifactId>
<version>1.2.0-cdh5.16.1version>
dependency>
<dependency>
<groupId>org.apache.hbasegroupId>
<artifactId>hbase-shaded-serverartifactId>
<version>1.2.0-cdh5.16.1version>
dependency>
<dependency>
<groupId>org.scala-langgroupId>
<artifactId>scala-libraryartifactId>
<version>2.10.5version>
dependency>
<dependency>
<groupId>com.fasterxml.jackson.coregroupId>
<artifactId>jackson-databindartifactId>
<version>2.6.6version>
dependency>
dependencies>
project>
用maven把pom.xml里jar包都下载下来,放到虚拟机的/opt/lib
目录下,并chmod a+r /opt/lib/*
让jar包可读。
# Hadoop Graph Configuration
#
gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
gremlin.hadoop.graphReader=org.janusgraph.hadoop.formats.hbase.HBaseInputFormat
gremlin.hadoop.graphWriter=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat
gremlin.hadoop.jarsInDistributedCache=true
gremlin.hadoop.inputLocation=none
gremlin.hadoop.outputLocation=output
#
# JanusGraph HBase InputFormat configuration
#
janusgraphmr.ioformat.conf.storage.backend=hbase
janusgraphmr.ioformat.conf.storage.hostname=10.0.20.20,10.0.20.21,10.0.20.22
#
# SparkGraphComputer Configuration
#
spark.master=spark://10.0.20.22:7077 #取决于spark的master的地址
spark.executor.memory=512m
spark.serializer=org.apache.spark.serializer.KryoSerializer
spark.kryo.registrator=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator
spark.driver.memory=1g
spark.executor.extraClassPath=/opt/lib/*
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
import org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.T;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.util.GraphFactory;
import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerFactory;
import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph;
import org.janusgraph.core.JanusGraph;
import org.janusgraph.core.JanusGraphFactory;
import java.util.List;
import java.util.concurrent.ExecutionException;
public class JanusGraphProgramTest {
public static void main(String[] args) throws Exception {
Graph graph = GraphFactory.open("janusgraph-cdh-spark-hbase.properties");
GraphTraversalSource g = graph.traversal().withComputer(SparkGraphComputer.class);
//generateModern(graph);
//graph.addVertex();
//g.tx().commit();
Object r = g.V().count().next();
System.out.println("----" + r);
graph.close();
}
public static void generateModern(final JanusGraph graph) {
final Vertex marko = graph.addVertex(T.label, "person", "name", "marko", "age", 29);
final Vertex vadas = graph.addVertex(T.label, "person", "name", "vadas", "age", 27);
final Vertex lop = graph.addVertex(T.label, "software", "name", "lop", "lang", "java");
final Vertex josh = graph.addVertex(T.label, "person", "name", "josh", "age", 32);
final Vertex ripple = graph.addVertex(T.label, "software", "name", "ripple", "lang", "java");
final Vertex peter = graph.addVertex(T.label, "person", "name", "peter", "age", 35);
marko.addEdge("knows", vadas, "weight", 0.5d);
marko.addEdge("knows", josh, "weight", 1.0d);
marko.addEdge("created", lop, "weight", 0.4d);
josh.addEdge("created", ripple, "weight", 1.0d);
josh.addEdge("created", lop, "weight", 0.4d);
peter.addEdge("created", lop, "weight", 0.2d);
}
}