(1)hadoop2.7.1源码编译 |
http://aperise.iteye.com/blog/2246856 |
(2)hadoop2.7.1安装准备 |
http://aperise.iteye.com/blog/2253544 |
(3)1.x和2.x都支持的集群安装 |
http://aperise.iteye.com/blog/2245547 |
(4)hbase安装准备 |
http://aperise.iteye.com/blog/2254451 |
(5)hbase安装 |
http://aperise.iteye.com/blog/2254460 |
(6)snappy安装 |
http://aperise.iteye.com/blog/2254487 |
(7)hbase性能优化 |
http://aperise.iteye.com/blog/2282670 |
(8)雅虎YCSBC测试hbase性能测试 |
http://aperise.iteye.com/blog/2248863 |
(9)spring-hadoop实战 |
http://aperise.iteye.com/blog/2254491 |
(10)基于ZK的Hadoop HA集群安装 |
http://aperise.iteye.com/blog/2305809 |
1.http://spring.io/blog/2015/02/09/spring-for-apache-hadoop-2-1-released
2.http://docs.spring.io/spring-hadoop/docs/current/reference/html/
上面是两处比较好的文档,因项目没整完,整完再放所有项目源代码。这里贴两张图:
1.maven工程中添加对spring-data-hadoop的依赖
org.springframework
spring-core
4.1.6.RELEASE
org.springframework
spring-beans
4.1.6.RELEASE
org.springframework
spring-context
4.1.6.RELEASE
org.springframework
spring-jdbc
4.1.6.RELEASE
org.springframework
spring-context-support
4.1.6.RELEASE
org.springframework.data
spring-data-hadoop
2.2.0.RELEASE
org.springframework.data
spring-data-hadoop-store
2.2.0.RELEASE
javax.servlet
servlet-api
netty
io.netty
org.xerial.snappy
snappy-java
1.1.0
runtime
org.apache.hadoop
hadoop-common
2.6.0
compile
org.mortbay.jetty
jetty
org.mortbay.jetty
jetty-util
org.mortbay.jetty
jsp-2.1
org.mortbay.jetty
jsp-api-2.1
org.mortbay.jetty
servlet-api-2.1
javax.servlet
servlet-api
javax.servlet.jsp
jsp-api
tomcat
jasper-compiler
tomcat
jasper-runtime
org.apache.hadoop
hadoop-auth
2.6.0
compile
org.apache.hbase
hbase-server
0.98.5-hadoop2
org.mortbay.jetty
jetty
org.mortbay.jetty
jetty-util
org.mortbay.jetty
jsp-2.1
org.mortbay.jetty
jsp-api-2.1
org.mortbay.jetty
servlet-api-2.1
tomcat
jasper-compiler
tomcat
jasper-runtime
org.apache.hbase
hbase-client
0.98.5-hadoop2
compile
log4j
log4j
org.slf4j
slf4j-log4j12
org.apache.hbase
hbase-common
0.98.5-hadoop2
org.apache.hbase
hbase-protocol
0.98.5-hadoop2
org.apache.zookeeper
zookeeper
3.4.6
netty
io.netty
log4j
log4j
1.2.17
2.hadoop1.x namenode+secondarynamenode方式下spring-data-hadoop配置文件如下:
fs.defaultFS=hdfs://192.168.202.131:9000/
dfs.replication=3
dfs.client.socket-timeout=600000
hbase.rootdir=hdfs://192.168.202.131:9000/hbase
dfs.replication=3
dfs.client.socket-timeout=600000
3.Hadoop 2.x HA下spring-data-hadoop配置文件如下:
fs.defaultFS=hdfs://hadoop-ha-cluster
dfs.client.socket-timeout=600000
ha.zookeeper.quorum=zk1:2181,zk2:2181,zk3:2181,zk4:2181,zk5:2181
ha.zookeeper.session-timeout.ms=300000
dfs.nameservices=hadoop-ha-cluster
dfs.ha.namenodes.hadoop-ha-cluster=namenode1,namenode2
dfs.namenode.rpc-address.hadoop-ha-cluster.namenode1=hadoop31:9000
dfs.namenode.http-address.hadoop-ha-cluster.namenode1=hadoop31:50070
dfs.namenode.rpc-address.hadoop-ha-cluster.namenode2=hadoop32:9000
dfs.namenode.http-address.hadoop-ha-cluster.namenode2=hadoop32:50070
dfs.client.failover.proxy.provider.hadoop-ha-cluster=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
hbase.rootdir=hdfs://hadoop-ha-cluster/hbase
hbase.cluster.distributed=true
zookeeper.session.timeout=30000
hbase.hregion.majorcompaction=0
hbase.regionserver.regionSplitLimit=1
dfs.client.socket-timeout=600000
4.一个在J2EE项目中一个获得spring上下文的工具类
1)在web.xml中保证配置了spring监听器,如下:
contextConfigLocation
classpath*:/applicationContext.xml
org.springframework.web.context.ContextLoaderListener
2)工具类SpringContextHolder
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
/**
* 以静态变量保存Spring ApplicationContext, 可在任何代码任何地方任何时候中取出ApplicaitonContext.
*
* @author calvin
*/
public class SpringContextHolder implements ApplicationContextAware, DisposableBean {
private static ApplicationContext applicationContext = null;
private static Logger logger = LoggerFactory.getLogger(SpringContextHolder.class);
/**
* 实现ApplicationContextAware接口, 注入Context到静态变量中.
*/
public void setApplicationContext(ApplicationContext applicationContext) {
logger.debug("注入ApplicationContext到SpringContextHolder:" + applicationContext);
if (SpringContextHolder.applicationContext != null) {
logger.warn("SpringContextHolder中的ApplicationContext被覆盖, 原有ApplicationContext为:"
+ SpringContextHolder.applicationContext);
}
SpringContextHolder.applicationContext = applicationContext; //NOSONAR
}
/**
* 实现DisposableBean接口,在Context关闭时清理静态变量.
*/
public void destroy() throws Exception {
SpringContextHolder.clear();
}
/**
* 取得存储在静态变量中的ApplicationContext.
*/
public static ApplicationContext getApplicationContext() {
assertContextInjected();
return applicationContext;
}
/**
* 从静态变量applicationContext中取得Bean, 自动转型为所赋值对象的类型.
*/
@SuppressWarnings("unchecked")
public static T getBean(String name) {
assertContextInjected();
return (T) applicationContext.getBean(name);
}
/**
* 从静态变量applicationContext中取得Bean, 自动转型为所赋值对象的类型.
*/
public static T getBean(Class requiredType) {
assertContextInjected();
return applicationContext.getBean(requiredType);
}
/**
* 清除SpringContextHolder中的ApplicationContext为Null.
*/
public static void clear() {
logger.debug("清除SpringContextHolder中的ApplicationContext:" + applicationContext);
applicationContext = null;
}
/**
* 检查ApplicationContext不为空.
*/
private static void assertContextInjected() {
if (applicationContext == null) {
throw new IllegalStateException("applicaitonContext未注入,请在applicationContext.xml中定义SpringContextHolder");
}
}
}
3)工具类需要在spring配置文件中配置
5.在J2EE项目中使用HDFS
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import com.besttone.spring.SpringContextHolder;
public class FileSystemUtil {
private static FileSystem fs = (FileSystem) SpringContextHolder.getBean("hadoop-cluster");
public void mkdirs() throws Exception { // create HDFS folder 创建一个文件夹
Path path = new Path("/test");
fs.mkdirs(path);
}
public void create() throws Exception { // create a file 创建一个文件
Path path = new Path("/test/a.txt");
FSDataOutputStream out = fs.create(path);
out.write("hello hadoop".getBytes());
}
public void rename() throws Exception { // rename a file 重命名
Path path = new Path("/test/a.txt");
Path newPath = new Path("/test/b.txt");
System.out.println(fs.rename(path, newPath));
}
public void copyFromLocalFile() throws Exception { // upload a local file
// 上传文件
Path src = new Path("/home/hadoop/hadoop-1.2.1/bin/rcc");
Path dst = new Path("/test");
fs.copyFromLocalFile(src, dst);
}
// upload a local file
// 上传文件
public void uploadLocalFile2() throws Exception {
Path src = new Path("/home/hadoop/hadoop-1.2.1/bin/rcc");
Path dst = new Path("/test");
InputStream in = new BufferedInputStream(new FileInputStream(new File(
"/home/hadoop/hadoop-1.2.1/bin/rcc")));
FSDataOutputStream out = fs.create(new Path("/test/rcc1"));
IOUtils.copyBytes(in, out, 4096);
}
public void listFiles() throws Exception { // list files under folder
// 列出文件
Path dst = new Path("/test");
FileStatus[] files = fs.listStatus(dst);
for (FileStatus file : files) {
System.out.println(file.getPath().toString());
}
}
public void getBlockInfo() throws Exception { // list block info of file
// 查找文件所在的数据块
Path dst = new Path("/test/rcc");
FileStatus fileStatus = fs.getFileStatus(dst);
BlockLocation[] blkloc = fs.getFileBlockLocations(fileStatus, 0,
fileStatus.getLen()); // 查找文件所在数据块
for (BlockLocation loc : blkloc) {
for (int i = 0; i < loc.getHosts().length; i++)
System.out.println(loc.getHosts()[i]);
}
}
}
6.在J2EE项目中使用hbase
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.hadoop.hbase.HbaseTemplate;
import org.springframework.data.hadoop.hbase.RowMapper;
import org.springframework.data.hadoop.hbase.TableCallback;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSON;
@Component
public class HbaseService {
private static final Logger logger = Logger.getLogger(HbaseService.class);
private static int FETCH_HBASE_SIZE=15000;
@Autowired
HbaseTemplate hbaseTemplate;
/**
* 通过表名和key获取一行数据
*
* @param tableName
* @param rowKey
* @return
*/
public Map get(String tableName, String rowKey) {
return hbaseTemplate.get(tableName, rowKey, new RowMapper