序
BerkeleyDB在爬虫、搜索领域里头用的比较多,整体来讲的特点是嵌入式、kv数据库,功能强大,能支持几百T的存储。本文主要讲述怎么在java里头使用它。
添加依赖
com.sleepycat
je
6.4.9
如果是5以上的版本,则需要添加oracle的仓库
oracleReleases
Oracle Released Java Packages
http://download.oracle.com/maven
default
使用方式
在java里头主要有两种使用方式,一种是基于注解形式的,一种是原始的api使用。本文主要是用注解形式的。
领域模型
/* An entity class. */
@Entity
public class Person {
@PrimaryKey
String ssn;
String name;
Address address;
@SecondaryKey(relate = MANY_TO_ONE, relatedEntity = Person.class)
String parentSsn;
@SecondaryKey(relate = ONE_TO_MANY)
Set emailAddresses = new HashSet();
@SecondaryKey(relate = MANY_TO_MANY,
relatedEntity = Employer.class,
onRelatedEntityDelete = NULLIFY)
Set employerIds = new HashSet();
public Person(String name, String ssn, String parentSsn) {
this.name = name;
this.ssn = ssn;
this.parentSsn = parentSsn;
}
private Person() {
} // For deserialization
public String getSsn() {
return ssn;
}
public void setSsn(String ssn) {
this.ssn = ssn;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Address getAddress() {
return address;
}
public void setAddress(Address address) {
this.address = address;
}
public String getParentSsn() {
return parentSsn;
}
public void setParentSsn(String parentSsn) {
this.parentSsn = parentSsn;
}
public Set getEmailAddresses() {
return emailAddresses;
}
public void setEmailAddresses(Set emailAddresses) {
this.emailAddresses = emailAddresses;
}
public Set getEmployerIds() {
return employerIds;
}
public void setEmployerIds(Set employerIds) {
this.employerIds = employerIds;
}
}
内嵌对象
/* A persistent class used in other classes. */
@Persistent
public class Address {
String street;
String city;
String state;
int zipCode;
public Address() {
} // For deserialization
public String getStreet() {
return street;
}
public void setStreet(String street) {
this.street = street;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getState() {
return state;
}
public void setState(String state) {
this.state = state;
}
public int getZipCode() {
return zipCode;
}
public void setZipCode(int zipCode) {
this.zipCode = zipCode;
}
}
关联
@Entity
public class Employer {
@PrimaryKey(sequence = "ID")
private long id;
@SecondaryKey(relate = ONE_TO_ONE)
private String name;
private Address address;
public Employer(String name) {
this.name = name;
}
private Employer() {
} // For deserialization
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Address getAddress() {
return address;
}
public void setAddress(Address address) {
this.address = address;
}
}
初始化及关闭操作
private Environment myEnv;
private EntityStore store;
private PrimaryIndex inventoryBySku;
private PrimaryIndex vendorByName;
private SecondaryIndex inventoryByName;
/* Employer accessors */
PrimaryIndex employerById;
SecondaryIndex employerByName;
/* Person accessors */
PrimaryIndex personBySsn;
SecondaryIndex personByParentSsn;
SecondaryIndex personByEmailAddresses;
SecondaryIndex personByEmployerIds;
private File envHome = new File(System.getProperty("user.dir") + File.separator + "bdb");
private boolean readOnly = false;
@Before
public void prepare() {
EnvironmentConfig myEnvConfig = new EnvironmentConfig();
StoreConfig storeConfig = new StoreConfig();
myEnvConfig.setReadOnly(readOnly);
storeConfig.setReadOnly(readOnly);
// If the environment is opened for write, then we want to be
// able to create the environment and entity store if
// they do not exist.
myEnvConfig.setAllowCreate(!readOnly);
storeConfig.setAllowCreate(!readOnly);
// Open the environment and entity store
System.out.println(envHome.getAbsolutePath());
if (!envHome.exists()) {
envHome.mkdir();
}
myEnv = new Environment(envHome, myEnvConfig);
store = new EntityStore(myEnv, "EntityStore", storeConfig);
// Primary key for Inventory classes
inventoryBySku = store.getPrimaryIndex(String.class, Inventory.class);
// Secondary key for Inventory classes
// Last field in the getSecondaryIndex() method must be
// the name of a class member; in this case, an Inventory.class
// data member.
inventoryByName = store.getSecondaryIndex(inventoryBySku, String.class, "itemName");
// Primary key for Vendor class
vendorByName = store.getPrimaryIndex(String.class, Vendor.class);
employerById = store.getPrimaryIndex(Long.class, Employer.class);
employerByName = store.getSecondaryIndex(employerById, String.class, "name");
personBySsn = store.getPrimaryIndex(String.class, Person.class);
personByParentSsn = store.getSecondaryIndex(personBySsn, String.class, "parentSsn");
personByEmailAddresses = store.getSecondaryIndex(personBySsn, String.class, "emailAddresses");
personByEmployerIds = store.getSecondaryIndex(personBySsn, Long.class, "employerIds");
}
@After
public void close() {
if (store != null) {
try {
store.close();
} catch (DatabaseException dbe) {
dbe.printStackTrace();
}
}
if (myEnv != null) {
try {
// Finally, close the store and environment.
myEnv.close();
} catch (DatabaseException dbe) {
dbe.printStackTrace();
}
}
}
增删改查
添加数据
@Test
public void putData() throws IOException {
List readLines = Resources.readLines(this.getClass().getClassLoader().getResource("vendors.txt"), Charsets.UTF_8);
for (String data : readLines) {
String[] sArray = data.split("#");
Vendor theVendor = new Vendor();
theVendor.setVendorName(sArray[0]);
theVendor.setAddress(sArray[1]);
theVendor.setCity(sArray[2]);
theVendor.setState(sArray[3]);
theVendor.setZipcode(sArray[4]);
theVendor.setBusinessPhoneNumber(sArray[5]);
theVendor.setRepName(sArray[6]);
theVendor.setRepPhoneNumber(sArray[7]);
// Put it in the store. Because we do not explicitly set
// a transaction here, and because the store was opened
// with transactional support, auto commit is used for each
// write to the store.
vendorByName.put(theVendor);
}
// Primary key for Inventory classes
PrimaryIndex inventoryBySku = store.getPrimaryIndex(
String.class, Inventory.class);
List data = Resources.readLines(this.getClass().getClassLoader().getResource("inventory.txt"), Charsets.UTF_8);
for (String row : data) {
String[] sArray = row.split("#");
Inventory theInventory = new Inventory();
theInventory.setItemName(sArray[0]);
theInventory.setSku(sArray[1]);
theInventory.setVendorPrice((new Float(sArray[2])).floatValue());
theInventory.setVendorInventory((new Integer(sArray[3])).intValue());
theInventory.setCategory(sArray[4]);
theInventory.setVendor(sArray[5]);
// Put it in the store. Note that this causes our secondary key
// to be automatically updated for us.
inventoryBySku.put(theInventory);
}
}
查询数据
@Test
public void getInventoryData() {
// Use the inventory name secondary key to retrieve
// these objects.
EntityCursor items =
inventoryByName.subIndex("Oranges").entities();
try {
for (Inventory item : items) {
System.out.println(ToStringBuilder.reflectionToString(item));
}
} finally {
items.close();
}
}
@Test
public void getAllInventory() {
// Get a cursor that will walk every
// inventory object in the store.
EntityCursor items = inventoryBySku.entities();
try {
for (Inventory item : items) {
System.out.println(ToStringBuilder.reflectionToString(item));
}
} finally {
items.close();
}
}
更新
如果不开启允许重复记录的话,put就是更新
@Test
public void update() {
String pk = "apple-for-update";
Inventory theInventory = new Inventory();
theInventory.setItemName("Apples");
theInventory.setSku(pk);
theInventory.setVendorPrice(1.20f);
theInventory.setVendorInventory(728);
theInventory.setCategory("fruits");
theInventory.setVendor("Off the Vine");
inventoryBySku.put(theInventory);
Inventory inventory = inventoryBySku.get(pk);
System.out.println(ToStringBuilder.reflectionToString(inventory));
inventory.setVendor("vendor update");
inventoryBySku.put(inventory);
System.out.println(ToStringBuilder.reflectionToString(inventoryBySku.get(pk)));
}
删除
@Test
public void delete() {
String pk = "apple-for-update";
Inventory theInventory = new Inventory();
theInventory.setItemName("Apples");
theInventory.setSku(pk);
theInventory.setVendorPrice(1.20f);
theInventory.setVendorInventory(728);
theInventory.setCategory("fruits");
theInventory.setVendor("Off the Vine");
inventoryBySku.put(theInventory);
Inventory inventory = inventoryBySku.get(pk);
System.out.println(ToStringBuilder.reflectionToString(inventory));
boolean rs = inventoryBySku.delete(pk);
Assert.assertTrue(rs);
Assert.assertNull(inventoryBySku.get(pk));
}
统计
@Test
public void count(){
EntityCursor cursor = null;
try{
cursor = employerById.entities();
int count = -1;
if(cursor.next() != null){
count = cursor.count();
}
System.out.println("employee count:" + count);
}finally {
if(cursor != null){
cursor.close();
}
}
}
级联及主键自增情况
@Test
public void sequencePk() {
/*
* Add a parent and two children using the Person primary index.
* Specifying a non-null parentSsn adds the child Person to the
* sub-index of children for that parent key.
*/
personBySsn.put(new Person("Bob Smith", "111-11-1111", null));
personBySsn.put(new Person("Mary Smith", "333-33-3333", "111-11-1111"));
personBySsn.put(new Person("Jack Smith", "222-22-2222", "111-11-1111"));
/* Print the children of a parent using a sub-index and a cursor. */
EntityCursor children = personByParentSsn.subIndex("111-11-1111").entities();
try {
for (Person child : children) {
System.out.println(child.getSsn() + ' ' + child.getName());
}
} finally {
children.close();
}
/* Get Bob by primary key using the primary index. */
Person bob = personBySsn.get("111-11-1111");
Assert.assertNotNull(bob);
/*
* Create two employers if they do not already exist. Their primary
* keys are assigned from a sequence.
*/
Employer gizmoInc = employerByName.get("Gizmo Inc");
if (gizmoInc == null) {
gizmoInc = new Employer("Gizmo Inc");
employerById.put(gizmoInc);
}
Employer gadgetInc = employerByName.get("Gadget Inc");
if (gadgetInc == null) {
gadgetInc = new Employer("Gadget Inc");
employerById.put(gadgetInc);
}
/* Bob has two jobs and two email addresses. */
bob.getEmployerIds().add(gizmoInc.getId());
bob.getEmployerIds().add(gadgetInc.getId());
bob.getEmailAddresses().add("[email protected]");
bob.getEmailAddresses().add("[email protected]");
/* Update Bob's record. */
personBySsn.put(bob);
/* Bob can now be found by both email addresses. */
bob = personByEmailAddresses.get("[email protected]");
Assert.assertNotNull(bob);
bob = personByEmailAddresses.get("[email protected]");
Assert.assertNotNull(bob);
/* Bob can also be found as an employee of both employers. */
EntityIndex employees;
employees = personByEmployerIds.subIndex(gizmoInc.getId());
Assert.assertTrue( employees.contains("111-11-1111"));
employees = personByEmployerIds.subIndex(gadgetInc.getId());
Assert.assertTrue(employees.contains("111-11-1111"));
/*
* When an employer is deleted, the onRelatedEntityDelete=NULLIFY for
* the employerIds key causes the deleted ID to be removed from Bob's
* employerIds.
*/
employerById.delete(gizmoInc.getId());
bob = personBySsn.get("111-11-1111");
Assert.assertNotNull(bob);
Assert.assertFalse(bob.getEmployerIds().contains(gizmoInc.getId()));
}
@Test
public void cursor() {
CursorConfig cc = new CursorConfig();
// This is ignored if the store is not opened with uncommitted read
// support.
cc.setReadUncommitted(true);
EntityCursor employers = employerById.entities(null, cc);
try{
for(Employer employer : employers){
System.out.println(ToStringBuilder.reflectionToString(employer));
}
}finally{
employers.close();
}
}
本工程github
参考
Oracle Berkeley DB Java Edition, 12c Release 1