自定义OutputFormat

自定义一个OutputFormat,用于输出<Text , MapWritable >格式的数据

MapWritable的内容是 [Text : LongWritable ]

输出格式 [url     url2 :times2 ,url3 :times3 ,...]

 

参考TextOutputFormat,修改简化之

 

public class TextAndMapWritableOutputFormat extends
		FileOutputFormat {

	@Override
	public RecordWriter getRecordWriter(FileSystem ignored,
			JobConf job, String name, Progressable progress) throws IOException {
		Path file = FileOutputFormat.getTaskOutputPath(job, name);
		FileSystem fs = file.getFileSystem(job);
		FSDataOutputStream fileOut = fs.create(file, progress);

		return new TextAndMapWritableRecordWriter(fileOut);
	}

	protected static class TextAndMapWritableRecordWriter implements RecordWriter {

		private static final String utf8 = "UTF-8";
		private static final byte[] newline;
		private static final byte[] keyValueSeparator;
		private static final byte[] colon;
		private static final byte[] comma;
		static {
			try {
				newline = "\n".getBytes(utf8);
				keyValueSeparator = "\t".getBytes(utf8);
				colon = ":".getBytes(utf8);
				comma = ",".getBytes(utf8);
			} catch (UnsupportedEncodingException uee) {
				throw new IllegalArgumentException("can't find " + utf8
						+ " encoding");
			}
		}

		protected DataOutputStream out;

		public TextAndMapWritableRecordWriter(DataOutputStream out) {
			this.out = out;
		}

		@Override
		public synchronized void write(Text key, MapWritable value)
				throws IOException {
			out.write(key.getBytes(), 0, key.getLength());
			out.write(keyValueSeparator);

			Iterator it = value.keySet().iterator();
			while (it.hasNext()) {
				Writable k = it.next();
				LongWritable v = (LongWritable) value.get(k);

				out.write(((Text) k).getBytes());
				out.write(colon);
				out.write(v.toString().getBytes(utf8));
				out.write(comma);
			}

			out.write(newline);
		}

		@Override
		public synchronized void close(Reporter reporter) throws IOException {
			out.close();
		}

	}

}

你可能感兴趣的:(hadoop)