自己写Hadoop的DBOutputFormat 类

import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 
 * 项目名称:aggressiondatafilter 类名称:Test 类描述: 创建人:黄传聪 创建时间:2013-12-9 下午10:21:35
 * 修改人: 修改时间: 修改备注:
 * 
 * @version
 */
@InterfaceAudience.Public
@InterfaceStability.Stable
public class Test<K extends DBWritable, V> extends OutputFormat<K, V> {

	private static final String DB_URL = "jdbc:mysql://10.5.82.224:3306/ale";
	private static final String DRIVER_CLASS = "com.mysql.jdbc.Driver";
	private static final String USER_NAME = "test";
	private static final String PASSWORD = "hcc199056";
//	private static final Log LOG = LogFactory
//			.getLog(DBOutputFormatForAgg.class);
	
	private static  Connection connection;

	@Override
	public void checkOutputSpecs(JobContext context) throws IOException,
			InterruptedException {
		// TODO Auto-generated method stub

	}

	@Override
	public OutputCommitter getOutputCommitter(TaskAttemptContext context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
                context);
	}

	@Override
	public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		TestRecordWriter<K, V> writer = null;
		try {
//			DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
			
			writer = new TestRecordWriter<K,V>(DRIVER_CLASS, DB_URL, USER_NAME, PASSWORD);
//			writer.write(K, V);
		} catch (SQLException | ClassNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} 
		return writer;
	}
	

}

和重写其他的Format类型一样,需要继承OutputFormat基类。
每一个Format都有一个对应的
RecordWriter
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

/**
 *
 * 项目名称:aggressiondatafilter
 * 类名称:TestRecordWriter
 * 类描述:需要写入数据库 数据
 * 创建人:黄传聪
 * 创建时间:2013-12-9 下午10:27:56
 * 修改人:
 * 修改时间:
 * 修改备注:
 * @version
 * @param <K>
 */
@InterfaceStability.Evolving
public class TestRecordWriter<K, V> extends RecordWriter<K, V>{

	private Connection connection;
    private PreparedStatement ps;
    private ResultSet rs = null;
    private String url;
    private String userName;
    private String password;
	/**
	
	  * TestRecordWriter
	  * 方法名称: 
	  * 方法描述:构造函数,用于获取connection 和 statement
	  * 参数:
	  * @param connection
	  * @param statement
	 * @throws SQLException 
	 * @throws ClassNotFoundException 
	  */
	public TestRecordWriter(String driveClass, String url, String userName, String password) throws SQLException, ClassNotFoundException {
		Class.forName(driveClass);
		this.url = url;
		this.userName = userName;
		this.password = password;
	}


	@Override
	public void close(TaskAttemptContext context) throws IOException,
			InterruptedException {
		// TODO Auto-generated method stub
		
	}

	@Override
	public void write(K key, V value) throws IOException, InterruptedException {
		// TODO Auto-generated method stub
//		StoreEPCData dataKey = (StoreEPCData) key;
//		BussinessInfo bussinessInfo = (BussinessInfo) value;
		try {
			connection = DriverManager.getConnection(url, userName, password);
			connection.setAutoCommit(false);
		} catch (SQLException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		}
		String sql = "insert into t_aggression_event (parent_id, action, biz_step, disposion, biz_location,read_point,biz_transtration,flag) values (?,?,?,?,?,?,?,?)";
		//集合事件id 
		long retVal = -1;
		try {
			ps = connection
					.prepareStatement(sql , Statement.RETURN_GENERATED_KEYS);
//			ps.setString(1, bussinessInfo.getParentId());
//			ps.setString(2, bussinessInfo.getAction());
//			ps.setInt(3, bussinessInfo.getBizStep());
//			ps.setInt(4, bussinessInfo.getDisposion());
//			ps.setInt(5, bussinessInfo.getBizLocation());
//			ps.setString(6, dataKey.getReadPoint());
//			ps.setInt(7, bussinessInfo.getBizLocation());
//			ps.setBoolean(8, false);
			ps.setString(1, "1");
			ps.setString(2, "1");
			ps.setInt(3, 1);
			ps.setInt(4, 1);
			ps.setInt(5, 1);
			ps.setString(6, "1");
			ps.setInt(7, 1);
			ps.setBoolean(8, false);
			ps.executeUpdate();
			
			
			if((rs=ps.getGeneratedKeys()).next()){
				retVal = rs.getLong(1);
			}
			if(retVal < 0){
				throw new SQLException("事件Id未获取到,集合事件信息未插入");
			}
			//插入订阅报告信息
			sql = "insert into t_aggression_event_subscribe (event_id) values (?)" ;
			ps = connection.prepareStatement(sql);
			ps.setLong(1, retVal);
			ps.executeUpdate();
			
			//插入集合事件标签信息
			sql = "insert into t_aggression_event_epc (epc, read_point, event_id, base_reader_name) values (?, ?, ?, ?)";
//			ps = connection.prepareStatement(sql);
//			ps.setString(1, dataKey.getEpcData());
//			ps.setString(2, dataKey.getReadPoint());
//			ps.setLong(3, retVal);
//			ps.setString(4, dataKey.getBaseReader());
			ps = connection.prepareStatement(sql);
			ps.setString(1, "1");
			ps.setString(2, "1");
			ps.setLong(3, 1);
			ps.setString(4, "1");
			ps.executeUpdate();
			connection.commit();
		} catch (SQLException ex) {
			try {
				connection.rollback();
			} catch (SQLException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} 
		}finally{
			
			if(rs != null){
				try {
					rs.close();
				} catch (SQLException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
			
			if(ps != null){
				try {
					ps.close();
				} catch (SQLException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				
			}
			
			if(connection != null){
				try {
					connection.close();
				} catch (SQLException  e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
		}
		
	}

}



看DBOutputFormat的源码也是,就是在RecordWriter中进行对数据库的写操作。
在Main函数中:job.setOutputFormatClass(Test.class);就可以了。(因为我的数据库的连接地址和用户名,密码都是写死的)



你可能感兴趣的:(自己写Hadoop的DBOutputFormat 类)