因公司业务发展,需要建立完整的关系图谱检索系统。经各种比较与考量,我们采用Titan图形数据库来处理复杂的逻辑关系。这篇《Titan数据库快速入门之神的光芒》同事已经详细介绍了Titan数据库的安装与使用,这里不再赘述。此篇博客介绍Titan的实际应用与成果展示,方便读者更好的理解Titan。
本项目基于工程大数据进行研究,下图为Hbase中的部分数据展示。通常,一个工程项目有一个或者多个标段,这些标段分别由不同的公司完成,也存在同一标段由多家公司联合完成的情况。例如,图中所示,上海公路桥梁(集团)有限公司完成了合肥至六安高速公路这一项目的路基十三标的施工。而合肥至六安高速公路下面的剩余标段分别由其他家公司完成,因此,施工公司、项目、标段就串成了一个关系图谱,用Titan数据库存储时对应图中的边和点关系。
Hbase中已经存入了近几年多家工程公司的业绩信息,但每个施工业绩都是独立的,没有建成一张关系图谱。所以首先,根据采集的数据构建一个关系图谱存入Titan数据库中。虽然是Titan数据库,但我们采用Hbase进行存储。
下面代码完成的任务是:
从表CompanyInfos_test中遍历所有公司的业绩信息,获取该公司名字,存为结点v1,并添加属性group=1;获取标段,存为结点v2,group=2;并在公司和标段之间建立一条边;获取项目名字,存为结点v3,group=3;并在项目和标段之间建立一条边;重复上述过程,即可建立一张所有公司、标段、项目串起来的关系图表,并存入Hbase中。
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.rednum.graph;
import com.google.gson.Gson;
import com.google.gson.internal.LinkedTreeMap;
import static com.rednum.graph.CompanyCountry.conf;
import static com.rednum.graph.TiTanDB.INDEX_NAME;
import static com.rednum.graph.TiTanDB.load;
import static com.rednum.graph.TiTanDB.query;
import com.thinkaurelius.titan.core.EdgeLabel;
import com.thinkaurelius.titan.core.Multiplicity;
import com.thinkaurelius.titan.core.PropertyKey;
import com.thinkaurelius.titan.core.TitanFactory;
import com.thinkaurelius.titan.core.TitanGraph;
import com.thinkaurelius.titan.core.TitanTransaction;
import com.thinkaurelius.titan.core.attribute.Geoshape;
import com.thinkaurelius.titan.core.attribute.Text;
import com.thinkaurelius.titan.core.schema.ConsistencyModifier;
import com.thinkaurelius.titan.core.schema.TitanGraphIndex;
import com.thinkaurelius.titan.core.schema.TitanManagement;
import com.thinkaurelius.titan.core.util.TitanCleanup;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.configuration.BaseConfiguration;
import org.apache.commons.configuration.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.tinkerpop.gremlin.process.traversal.Order;
import org.apache.tinkerpop.gremlin.process.traversal.P;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.structure.Direction;
import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.T;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
/**
*
* @author X.H.Yang
*/
public class TiTanNew {
public static final String INDEX_NAME = "search";
public static org.apache.hadoop.conf.Configuration conf = null;
static {
conf = HBaseConfiguration.create();
conf.setLong(HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY, 180000);
}
public static TitanGraph create() {
try {
//创建名为newgraph的表。
TitanGraph graph = TitanFactory.build()
.set("storage.backend", "hbase")
.set("storage.hostname", "192.168.1.252")
.set("storage.hbase.table", "newgraph")
.set("cache.db-cache", "true")
.set("cache.db-cache-clean-wait", "20")
.set("cache.db-cache-time", "180000")
.set("cache.db-cache-size", "0.5")
.set("index.newgraph.backend", "elasticsearch")
.set("index.newgraph.hostname", "192.168.1.212")
.set("index.newgraph.port", 9300)
.set("index.newgraph.elasticsearch.client-only", true)
.open();
return graph;
} catch (Exception e) {
System.out.println(e);
return null;
}
}
public static void loadWithoutMixedIndex(final TitanGraph graph,
boolean uniqueNameCompositeIndex) {
load(graph, null, uniqueNameCompositeIndex);
}
public static void load(final TitanGraph graph) {
load(graph, "newgraph", true);
}
public static void load(final TitanGraph graph, String mixedIndexName,
boolean uniqueNameCompositeIndex) {
// 添加索引,name为key,并建立边和点的联合索引,提高查询速率
try {
TitanManagement mgmt = graph.openManagement();
final PropertyKey name = mgmt.makePropertyKey("name").dataType(String.class).make();
TitanManagement.IndexBuilder nameIndexBuilder = mgmt.buildIndex("name", Vertex.class).addKey(name);
if (uniqueNameCompositeIndex) {
nameIndexBuilder.unique();
}
TitanGraphIndex namei = nameIndexBuilder.buildCompositeIndex();
mgmt.setConsistency(namei, ConsistencyModifier.LOCK);
final PropertyKey group = mgmt.makePropertyKey("group").dataType(Integer.class).make();
if (null != mixedIndexName) {
mgmt.buildIndex("vertices", Vertex.class).addKey(group).buildMixedIndex(mixedIndexName);
}
final PropertyKey projectname = mgmt.makePropertyKey("projectname").dataType(String.class).make();
final PropertyKey sectionname = mgmt.makePropertyKey("sectionname").dataType(String.class).make();
if (null != mixedIndexName) {
mgmt.buildIndex("edges", Edge.class).addKey(projectname).addKey(sectionname).buildMixedIndex(mixedIndexName);
}
mgmt.makeEdgeLabel("ComSec").multiplicity(Multiplicity.MANY2ONE).make();
mgmt.makeEdgeLabel("SecPro").multiplicity(Multiplicity.MANY2ONE).make();
mgmt.commit();
} catch (Exception e) {
System.out.println(e);
}
}
public void doCreatCompanyGraph() {
TitanGraph graph = create(); //建立表
load(graph); //添加索引
GraphTraversalSource g = graph.traversal(); //遍历表
String error = "";
while (true) {
try {
//遍历采集的数据CompanyInfos_test表中所有公司名字
HTable country = new HTable(conf, "CompanyInfos_test");
Scan s = new Scan();
ResultScanner rs = country.getScanner(s);
int count = 0;
for (Result r : rs) {
count++;
String row = Bytes.toString(r.getRow());
error = row;
//避免同一公司重复加入图谱,name设为索引key,只能唯一存在
if (g.V().has("group", 1).has("name", row).hasNext()) {
continue;
}
//建立公司名称节点,属性有label, name, group, 即公司名字的group为1,label为company
Vertex v1 = graph.addVertex(T.label, "company", "name", row, "group", 1);
//Hbase相关查询语句,根据rowkey获取指定列族里所有列
Get get = new Get(Bytes.toBytes(row));
get.addFamily(Bytes.toBytes("performance_owner"));
Result rs1 = country.get(get);
for (Cell cell : rs1.rawCells()) {
String performance = "";
String project = "";
performance = Bytes.toString(CellUtil.cloneValue(cell));
project = getProjectByPer(performance);
if (!"".equals(performance)) {
performance = performance.replace(row, "");
}
if (!"".equals(project)) {
performance = performance.replace(project, "");
}
if ("/".equals(performance) || "".equals(performance)) {
performance = project;
}
Vertex v2 = null;
if (!"".equals(performance)) {
//建立标段节点,属性有label, sectionname, group, 即标段的group为2,label为section
v2 = graph.addVertex(T.label, "section", "sectionname", performance, "group", 2);
//添加一条由v1指向v2,属性为ComSec的边
v1.addEdge("ComSec", v2);
}
if (!"".equals(project)) {
Vertex v3 = null;
if (g.V().has("group", 3).has("projectname", project).hasNext()) {
v3 = g.V().has("group", 3).has("projectname", project).next();
} else {
v3 = graph.addVertex(T.label, "project", "projectname", project, "group", 3);
}
v2.addEdge("SecPro", v3);
}
graph.tx().commit();
}
graph.tx().commit();
System.out.println(row + ": 第" + count + "家公司");
}
rs.close();
System.out.println("共有数据" + count);
break;
} catch (Exception e) {
System.out.println(e.toString());
System.out.println("公司:" + error + "捕获异常");
//对异常的结点进行删除
Vertex ver = g.V().has("group", 1).has("name", error).next();
GraphTraversal mF = g.V(ver).out("ComSec");
while (mF.hasNext()) {
Vertex ver1 = mF.next();
g.V(ver1).drop().iterate(); //删除该节点
}
g.V(ver).drop().iterate();
graph.tx().commit();
continue;
}
}
System.out.println(g.V().count().next());
System.out.println(g.E().count().next());
graph.close();
}
public String getProjectByPer(String per) throws IOException {
HTable table = new HTable(conf, "CompanyOwner_test");
Get get = new Get(per.getBytes("utf-8"));
get.addColumn(Bytes.toBytes("ProjectInfo"), Bytes.toBytes("ProjectName"));
Result rs = table.get(get);
String project = "";
for (Cell cell : rs.rawCells()) {
project = Bytes.toString(CellUtil.cloneValue(cell));
}
return project;
}
public static void main(String[] args) throws Exception {
try {
TiTanNew titan = new TiTanNew();
titan.doCreatCompanyGraph();
} catch (Exception e) {
e.toString();
}
}
}
下面给出模糊搜索项目:“遵义至毕节高速公路”的实现过程。
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.rednum.graph;
import com.google.gson.Gson;
import com.thinkaurelius.titan.core.TitanException;
import com.thinkaurelius.titan.core.TitanFactory;
import com.thinkaurelius.titan.core.TitanGraph;
import com.thinkaurelius.titan.core.TitanVertex;
import com.thinkaurelius.titan.core.attribute.Text;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import org.apache.commons.configuration.BaseConfiguration;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
import org.apache.tinkerpop.gremlin.structure.Direction;
import org.apache.tinkerpop.gremlin.structure.Vertex;
/**
*
* @author X.H.Yang
*/
public class CompanyCountrySearch {
private Gson gson = new Gson();
public static TitanGraph open() {
try {
org.apache.commons.configuration.Configuration conf = new BaseConfiguration();
conf.setProperty("storage.backend", "hbase");
conf.setProperty("storage.hostname", "192.168.1.252");
conf.setProperty("storage.hbase.table", "newgraph");
TitanGraph graph = TitanFactory.open(conf);
return graph;
} catch (Exception e) {
System.out.println(e);
return null;
}
}
public String doSearch() {
TitanGraph graph = CompanyCountrySearch.open();
GraphTraversalSource g = graph.traversal();
try {
String jstr = "";
Iterable mm = graph.query().has("group", 3).has("projectname", Text.REGEX, ".*遵义至毕节高速公路.*").vertices();
for (TitanVertex tt : mm) {
HashMap params = new HashMap<>();
List
我们也可根据公司名字检索出相关联的项目和标段信息,代码如下:
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.rednum.graph;
import com.google.gson.Gson;
import com.thinkaurelius.titan.core.TitanFactory;
import com.thinkaurelius.titan.core.TitanGraph;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.configuration.BaseConfiguration;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
import org.apache.tinkerpop.gremlin.structure.Vertex;
/**
*
* @author X.H.Yang
*/
public class TitanSearchByCompany {
private Gson gson = new Gson();
public static TitanGraph open() {
try {
org.apache.commons.configuration.Configuration conf = new BaseConfiguration();
conf.setProperty("storage.backend", "hbase");
conf.setProperty("storage.hostname", "192.168.1.252");
conf.setProperty("storage.hbase.table", "newgraph");
TitanGraph graph = TitanFactory.open(conf);
return graph;
} catch (Exception e) {
System.out.println(e);
return null;
}
}
public String searchBycom() {
TitanGraph graph = open();
GraphTraversalSource g = graph.traversal();
try {
HashMap params = new HashMap<>();
List
由于Titan数据库没有可视化界面,所以我们在web平台上开发了根据搜索内容,呈现关系图谱的功能,图的展示主要用了D3中的力导向图,力导向图的实现将在下一篇文中由前端同事介绍。
通过下面的搜索界面,得到公司或者项目的业绩图谱。
搜索“中铁十局集团有限公司”得到的关系图为:(效果图是不是很炫)
搜索项目“雅安至康定”得到下面的关系图谱,由于屏幕有限,只展示一层的关系。
文章到此结束,这是小编学了Titan数据库后完成的第一个项目,如有不对的地方,请在下方留言指正。感兴趣的我们可以相互交流。