利用MapReduce计算平均数

利用mapreduce求出股票价格的开盘和收盘平均数

下图为采集到的股票信息,共计1416支股票的信息

利用MapReduce计算平均数_第1张图片

因为在linux系统下默认采用utf-8的编码格式,而在win下txt默认采用ANSI编码格式。所以需要在linux下将文件转换一下格式,可以采用:

递归 转换 (包括子文件夹)
find default -type d -exec mkdir -p utf/{} \;
find default -type f -exec iconv -f  GBK  -t  UTF-8  {} -o utf/{} \;

这两行命令将default目录下的文件由GBK编码转换为UTF-8编码,目录结构不变,转码后的文件保存在utf/default目录下。
package economic;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class ScoreAvgTest {
	/**
	 * 
	 * @author hadoop KEYIN:输入map的key值,为每行文本的开始位置子字节计算,(0,11...)
	 *         VALUEIN:输入map的value,为每行文本值 KEYOUT :输出的key值 VALUEOUT:输出的value值
	 */
	public static class MapperClass extends Mapper {
		private Text companyName = new Text();
		private Text open = new Text();
		
		private Text data=new Text();
		private int n = 0;

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			// TODO Auto-generated method stub

			System.out.println(this.n);
			n++;

			String lineText = value.toString();
			String[] args = lineText.split("\\s+");

			if (args.length == 4) {
				this.companyName.set(args[1]);
			}
			if (args.length == 7) {
				try {
					System.out.println("Bfter Reducer:" + companyName + ","
							+ args[1]);
					data.set(args[1]+" "+args[4]);
					context.write(this.companyName, data);
				} catch (IOException e) {
					e.printStackTrace();
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}

		}

	}

	/**
	 * 
	 * @author hadoop KEYIN:输入的名字 VALUEIN:输入的分数 KEYOUT:输出的名字 VALUEOUT:统计输出的平均分
	 */
	public static class ReducerClass extends Reducer {

		private Text text = new Text();

		protected void reduce(Text companyName, Iterable kaipan,
				Context context) throws IOException, InterruptedException {
			// TODO Auto-generated method stub

			double sumOpen = 0.0;
			double sumClose = 0.0;
			int num = 0;

			Iterator $it = kaipan.iterator();
			while ($it.hasNext()) {
				String record = $it.next().toString();
				String[] getData=record.split(" ");
				System.out.println(num);
				System.out.println("原始数据:" + record);
				num++;
				System.out.println("第" + num + "次循环");
				sumOpen += (Double.valueOf(getData[0])*100);
				sumClose+=(Double.valueOf(getData[1])*100);
			}
			double openPrise = sumOpen / (100 * num);
			double closePrise = sumClose / (100 * num);
			System.out.println("openPrice1:" + openPrise);
			System.out.println("closePrice1:" + closePrise);
			openPrise = (double) Math.round(openPrise * 100) / 100;
			closePrise = (double) Math.round(closePrise * 100) / 100;
			System.out.println("sumOpen:" + sumOpen+"   sumClose"+sumClose);
			System.out.println("openPrice2:" + openPrise);
			System.out.println("closePrice2:" + closePrise);
			String result ="开盘平均价:"+Double.toString(openPrise)+",   收盘平均价:"+Double.toString(closePrise);  
			text.set(result);
			try {
				context.write(companyName, text);
			} catch (IOException e) {
				e.printStackTrace();
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}

	}

	public static void main(String[] args) throws IOException,
			InterruptedException, ClassNotFoundException {

		Configuration conf = new Configuration();
		conf.set("fs.default.name", "hdfs://localhost:9000");
		String[] otherArgs = new String[] { "export", "output" };    //export为文本输入路径,output为输出路径
		if (otherArgs.length < 2) {
			System.err.println("Usage:wordcount[...]");
			System.exit(2);
		}
		Job job = Job.getInstance(conf, "arg");
		job.setJarByClass(ScoreAvgTest.class);
		job.setMapperClass(MapperClass.class);
//		job.setCombinerClass(ReducerClass.class);
		System.out.println("Mapper over");
		job.setReducerClass(ReducerClass.class);
		System.out.println("Reducer over");
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
		System.exit(job.waitForCompletion(true) ? 0 : 1);

	}
}

运行后生成的output文件夹中的文件


查看

利用MapReduce计算平均数_第2张图片

需要股票信息的可以留言

你可能感兴趣的:(hadoop)