package kmeans;
import java.io.BufferedReader;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableFactories;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.google.gson.Gson;
/***
* KMeans算法的MapReduce实现
* @author chenjie
*/
public class KMeans extends Configured implements Tool {
/**
* 要聚类的簇数量
*/
public static int K = 3;
/***
* 迭代次数
*/
public static int REPEAT = 10;
/***
* 标记是否是第一次迭代(第一次从输入文件里随机选择聚类中心;其他次则从上一次的输出文件读取聚类中心)
*/
public static boolean firstTime = true;
/**
* 输入文件名
*/
public static String FILE = "/media/chenjie/0009418200012FF3/ubuntu/kmeans_input_file.txt";
/***
* 输出文件夹
*/
public static String REDUCE_OUTPUT_DIR = "/media/chenjie/0009418200012FF3/ubuntu/kmeans/";
/***
* 输出文件
*/
public static String REDUCE_OUTPUT = REDUCE_OUTPUT_DIR + "part-r-00000";
/***
* 缓存的簇中心集合
*/
public static List> cachedCenters = new ArrayList>();
/***
* 从文件中读取簇中心向量集合
* @param path 文件路径
* @param K 中心点个数
* @return 从文件中读取簇中心向量集合
*/
private static List> readRandomCenterFromInputFile(String path,int K)
{
List> list = new ArrayList>();
try{
BufferedReader br = new BufferedReader(new FileReader(path));//构造一个BufferedReader类来读取文件
String s = null;
int count = 0;//记录已经读取到的点的个数
while((s = br.readLine())!=null && count < K){//使用readLine方法,一次读一行
System.out.println("readRandomCenterFromInputFile读取一行:" + s);
count ++;
String tokens[] = s.split(" ");//输入文件中,点的分量坐标以空格隔开
ArrayList vector = new ArrayList();//点的分量集合中
for(String token : tokens)
{
vector.add(Double.valueOf(token));//将点的各个分量坐标存到点的分量集合中
}
list.add(vector);//将点添加到点集合
}
br.close();
}catch(Exception e){
e.printStackTrace();
return list;
}
return list;
}
/***
* 映射器,将文本文件作为输入。
* 写出将由规约器处理的键值对,其中键是离输入点最近的簇中心,值是一个d维向量。键和值都用自定义类型ListWritable表示
* @author chenjie
*/
public static class KMeansMapper extends Mapper
{
/***
* 在map之前调用,从文件中读取簇中心向量集合从而加载到内存中
*/
@Override
protected void setup( Mapper.Context context)throws IOException, InterruptedException
{
super.setup(context);
if(firstTime)//如果是第一次迭代
{
KMeans.cachedCenters = readRandomCenterFromInputFile(FILE,K);//从输入文件中得到随机K个点
firstTime = false;//不再是第一次迭代
}
System.out.println("----------setup------------");
System.out.println("----------centers------------");
for(ArrayList vector : cachedCenters)
{
System.out.println(vector);//输出各个点的坐标
}
}
/***
* key为行号,value为每一行的内容,即每一个点的坐标。context为hadoop上下文
*/
@Override
protected void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException
{
System.out.println("map value=" + value.toString());
ArrayList valueVector = getVectorFromString(value.toString());//得到这行对应的这个点的坐标
System.out.println("valueVector=" + valueVector.toString());
ArrayList nearest = null;//保存与输入点有最小距离的簇中心的坐标
double nearestDistance = Double.MAX_VALUE;//保存这个点到各个簇中心的最近距离
for(ArrayList center : cachedCenters)//对于每个簇中心
{
double distance = calculateDistance(center,valueVector);//计算这个点到这个簇中心的距离
if(nearest == null)//如果之前没有与输入点有最小距离的簇中心,则这个簇中心是目前与输入点有最小距离的簇中心
{
nearest = center;//更新与输入点有最小距离的簇中心
nearestDistance = distance;//更新这个点到各个簇中心的最近距离
}
else//如果之前有与输入点有最小距离的簇中心,则将[这个点到这个簇中心的距离]与[这个点到各个簇中心的最近距离]进行比较
{
if(distance < nearestDistance )//[这个点到这个簇中心的距离]比[这个点到各个簇中心的最近距离]还要小,则说明发现新的簇中心,要更新
{
nearest = center;
nearestDistance = distance;
}
}
}
if(nearest != null)//与输入点有最小距离的簇中心存在,则将其输出给combine处理
{
List nearestWritableList = new ArrayList();
//由于List不能作为MapReduce的键、值类型,因此要自定义一个List类型
for(Double d : nearest)
{
nearestWritableList.add(new DoubleWritable(d));//讲簇中心的各个分量进行DoubleWritable包装
}
ListWritable outputkey = new ListWritable(nearestWritableList);
List valueWritableList = new ArrayList();
for(Double d : valueVector)
{
valueWritableList.add(new DoubleWritable(d));
}
ListWritable outputvalue = new ListWritable(valueWritableList);
System.out.println("map 生成:" + outputkey + "," + outputvalue);
context.write(outputkey, outputvalue);
}
}
/**
*
* @param vector1 向量1:(X1,X2,...)
* @param vector2 向量2:(Y1,Y2,...)
* @return 计算两个向量的欧几里德距离:d=sqrt((X1-Y1)^2+(X2-Y2)^2+...)
*/
private double calculateDistance(ArrayList vector1,
ArrayList vector2) {
double sum = 0.0;
int length = vector1.size();
for(int i=0;i getVectorFromString(String string) {
String tokens[] = string.split(" ");
ArrayList vector = new ArrayList();
for(String value : tokens)
{
vector.add(Double.valueOf(value));
}
return vector;
}
}
/***
* 组合器,组合映射任务的中间数据
* 累加向量各个维的值
* @author chenjie
*/
public static class KMeansCombiner extends Reducer
{
@Override
protected void reduce(ListWritable key,Iterable values,Context context) throws IOException, InterruptedException {
System.out.println("----------------------KMeansCombiner---------------------");
System.out.println("key=" + key);
System.out.println("values:" );
ArrayList sum = new ArrayList();
//sum向量用来保存key值相同的所有value的向量分量之和
//sum0=x0+y0
//sum1=x1+y1
sum.add(0D);
sum.add(0D);
int count = 0;//保存values的长度
for(ListWritable value : values)
{
count ++;
System.out.println("value=" + value);
if(value.get().isEmpty())
continue;
List writables = value.get();
for(int i=0;i sumWritableList = new ArrayList();
for(Double d : sum)
{
sumWritableList.add(new DoubleWritable(d / count));//将各个分量取平均值
}
System.out.println("sumWritableList=" + sumWritableList);
ListWritable outputValue = new ListWritable(sumWritableList);
context.write(key, outputValue);
}
}
public static class KMeansReducer extends Reducer
{
@Override
protected void reduce(ListWritable key,Iterable values,Context context)throws IOException, InterruptedException {
System.out.println("----------------------reduce---------------------");
System.out.println("key=" + key);
System.out.println("values:");
ArrayList newCenter = new ArrayList();//新簇中心坐标分量集合
newCenter.add(0D);//初始化为0
newCenter.add(0D);//初始化为0
int count = 0;
for(ListWritable value : values)
{
System.out.println(value);
count ++;
for(int i=0;i newCenterWritableList = new ArrayList();
for(Double d : newCenter)
{
newCenterWritableList.add(new DoubleWritable(d));
}
ListWritable outputValue = new ListWritable(newCenterWritableList);
System.out.println("reduce生成:" + key + "|" + outputValue);
context.write(outputValue,NullWritable.get() );
}
}
public static void main(String[] args) throws Exception
{
getCJKMeansConf();
args = new String[2];
args[0] = FILE;
args[1] = REDUCE_OUTPUT_DIR;
while(REPEAT > 0)
{
int jobStatus = submitJob(args);
if(jobStatus == 0)
{
KMeans.cachedCenters = readRandomCenterFromInputFile(REDUCE_OUTPUT,K);//每次reduce结束后,将reduce的结果缓存起来
}
REPEAT --;
}
System.out.println("----------------------------------------KMeans聚类结果--------------------------------------");
for(ArrayList point : KMeans.cachedCenters)
{
System.out.println(point);
}
}
public static int submitJob(String[] args) throws Exception {
int jobStatus = ToolRunner.run(new KMeans(), args);
return jobStatus;
}
@SuppressWarnings("deprecation")
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = new Job(conf);
job.setJobName("Kmeans");
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class);
job.setOutputKeyClass(ListWritable.class);
job.setOutputValueClass(ListWritable.class);
job.setMapOutputKeyClass(ListWritable.class);
job.setMapOutputValueClass(ListWritable.class);
job.setMapperClass(KMeansMapper.class);
job.setReducerClass(KMeansReducer.class);
job.setCombinerClass(KMeansCombiner.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
FileSystem fs = FileSystem.get(conf);
Path outPath = new Path(args[1]);
if(fs.exists(outPath))
{
fs.delete(outPath, true);
}
boolean status = job.waitForCompletion(true);
return status ? 0 : 1;
}
/***
* 自定义向量类,可以作为MapReduce的键和值
* @author chenjie
*/
public static class ListWritable implements Writable , WritableComparable{
private Class extends Writable> valueClass;
@SuppressWarnings("rawtypes")
private Class extends List> listClass;
private List values;
public ListWritable() {
}
public ListWritable(List values) {
listClass = values.getClass();
valueClass = values.get(0).getClass();
this.values = values;
}
public Class extends Writable> getValueClass() {
return valueClass;
}
@SuppressWarnings("rawtypes")
public Class extends List> getListClass() {
return listClass;
}
public void set(List values) {
this.values = values;
}
public List get() {
return values;
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public void readFields(DataInput in) throws IOException {
String listClass = in.readUTF();
try {
this.listClass = (Class extends List>) Class.forName(listClass);
String valueClass = in.readUTF();
this.valueClass = (Class extends Writable>) Class
.forName(valueClass);
} catch (ClassNotFoundException e1) {
e1.printStackTrace();
}
int size = in.readInt(); // construct values
try {
values = this.listClass.newInstance();
} catch (InstantiationException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
}
for (int i = 0; i < size; i++) {
Writable value = WritableFactories.newInstance(this.valueClass);
value.readFields(in); // read a value
values.add(value); // store it in values
}
}
public void write(DataOutput out) throws IOException {
out.writeUTF(listClass.getName());
out.writeUTF(valueClass.getName());
out.writeInt(values.size()); // write values
Iterator iterator = values.iterator();
while (iterator.hasNext()) {
iterator.next().write(out);
}
}
public int size() {
return values.size();
}
public boolean isEmpty() {
return values==null? true :false;
}
@Override
public int compareTo(ListWritable o) {
int flag = 0;
for(int i=0;i
输入:kmeans_input_file.txt
1.0 2.0
1.0 3.0
1.0 4.0
2.0 5.0
2.0 6.0
2.0 7.0
2.0 8.0
3.0 100.0
3.0 101.0
3.0 102.0
3.0 103.0
3.0 104.0
输出:
2017-11-18 13:40:59,061 INFO [localfetcher#4] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(141)) - localfetcher#4 about to shuffle output of map attempt_local1325636158_0004_m_000000_0 decomp: 476 len: 480 to MEMORY
2017-11-18 13:40:59,061 INFO [localfetcher#4] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 476 bytes from map-output for attempt_local1325636158_0004_m_000000_0
2017-11-18 13:40:59,061 INFO [localfetcher#4] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(315)) - closeInMemoryFile -> map-output of size: 476, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->476
2017-11-18 13:40:59,062 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2017-11-18 13:40:59,062 INFO [pool-13-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:40:59,063 INFO [pool-13-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(687)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-11-18 13:40:59,064 INFO [pool-13-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:40:59,064 INFO [pool-13-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:40:59,064 INFO [pool-13-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(754)) - Merged 1 segments, 476 bytes to disk to satisfy reduce memory limit
2017-11-18 13:40:59,065 INFO [pool-13-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(784)) - Merging 1 files, 480 bytes from disk
2017-11-18 13:40:59,065 INFO [pool-13-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(799)) - Merging 0 segments, 0 bytes from memory into reduce
2017-11-18 13:40:59,065 INFO [pool-13-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:40:59,066 INFO [pool-13-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:40:59,066 INFO [pool-13-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
----------------------reduce---------------------
key=1.0 2.5
values:
1.0 3.0
reduce生成:1.0 2.5|1.0 3.0
----------------------reduce---------------------
key=1.8 6.0
values:
2.0 6.5
reduce生成:1.8 6.0|2.0 6.5
----------------------reduce---------------------
key=3.0 102.0
values:
3.0 102.0
reduce生成:3.0 102.0|3.0 102.0
2017-11-18 13:40:59,073 INFO [pool-13-thread-1] mapred.Task (Task.java:done(1001)) - Task:attempt_local1325636158_0004_r_000000_0 is done. And is in the process of committing
2017-11-18 13:40:59,075 INFO [pool-13-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:40:59,075 INFO [pool-13-thread-1] mapred.Task (Task.java:commit(1162)) - Task attempt_local1325636158_0004_r_000000_0 is allowed to commit now
2017-11-18 13:40:59,077 INFO [pool-13-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(439)) - Saved output of task 'attempt_local1325636158_0004_r_000000_0' to file:/media/chenjie/0009418200012FF3/ubuntu/kmeans/_temporary/0/task_local1325636158_0004_r_000000
2017-11-18 13:40:59,077 INFO [pool-13-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2017-11-18 13:40:59,078 INFO [pool-13-thread-1] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local1325636158_0004_r_000000_0' done.
2017-11-18 13:40:59,078 INFO [pool-13-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local1325636158_0004_r_000000_0
2017-11-18 13:40:59,078 INFO [Thread-101] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2017-11-18 13:40:59,968 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - Job job_local1325636158_0004 running in uber mode : false
2017-11-18 13:40:59,969 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1374)) - map 100% reduce 100%
2017-11-18 13:40:59,970 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Job job_local1325636158_0004 completed successfully
2017-11-18 13:40:59,979 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1392)) - Counters: 33
File System Counters
FILE: Number of bytes read=9264
FILE: Number of bytes written=2077542
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=12
Map output records=12
Map output bytes=1872
Map output materialized bytes=480
Input split bytes=130
Combine input records=12
Combine output records=3
Reduce input groups=3
Reduce shuffle bytes=480
Reduce input records=3
Reduce output records=3
Spilled Records=6
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=0
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=1292894208
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=106
File Output Format Counters
Bytes Written=38
readRandomCenterFromInputFile读取一行:1.0 3.0
readRandomCenterFromInputFile读取一行:2.0 6.5
readRandomCenterFromInputFile读取一行:3.0 102.0
2017-11-18 13:41:00,004 INFO [main] jvm.JvmMetrics (JvmMetrics.java:init(71)) - Cannot initialize JVM Metrics with processName=JobTracker, sessionId= - already initialized
2017-11-18 13:41:00,011 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-11-18 13:41:00,012 INFO [main] input.FileInputFormat (FileInputFormat.java:listStatus(281)) - Total input paths to process : 1
2017-11-18 13:41:00,023 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(199)) - number of splits:1
2017-11-18 13:41:00,034 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(288)) - Submitting tokens for job: job_local1998692281_0005
2017-11-18 13:41:00,098 INFO [main] mapreduce.Job (Job.java:submit(1301)) - The url to track the job: http://localhost:8080/
2017-11-18 13:41:00,098 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1346)) - Running job: job_local1998692281_0005
2017-11-18 13:41:00,098 INFO [Thread-128] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2017-11-18 13:41:00,100 INFO [Thread-128] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-11-18 13:41:00,102 INFO [Thread-128] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2017-11-18 13:41:00,102 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local1998692281_0005_m_000000_0
2017-11-18 13:41:00,103 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:00,104 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(753)) - Processing split: file:/media/chenjie/0009418200012FF3/ubuntu/kmeans_input_file.txt:0+106
2017-11-18 13:41:00,167 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1202)) - (EQUATOR) 0 kvi 26214396(104857584)
2017-11-18 13:41:00,167 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(995)) - mapreduce.task.io.sort.mb: 100
2017-11-18 13:41:00,167 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(996)) - soft limit at 83886080
2017-11-18 13:41:00,167 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(997)) - bufstart = 0; bufvoid = 104857600
2017-11-18 13:41:00,167 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - kvstart = 26214396; length = 6553600
2017-11-18 13:41:00,168 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(402)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
----------setup------------
----------centers------------
[1.0, 3.0]
[2.0, 6.5]
[3.0, 102.0]
map value=1.0 2.0
valueVector=[1.0, 2.0]
map 生成:1.0 3.0,1.0 2.0
map value=1.0 3.0
valueVector=[1.0, 3.0]
map 生成:1.0 3.0,1.0 3.0
map value=1.0 4.0
valueVector=[1.0, 4.0]
map 生成:1.0 3.0,1.0 4.0
map value=2.0 5.0
valueVector=[2.0, 5.0]
map 生成:2.0 6.5,2.0 5.0
map value=2.0 6.0
valueVector=[2.0, 6.0]
map 生成:2.0 6.5,2.0 6.0
map value=2.0 7.0
valueVector=[2.0, 7.0]
map 生成:2.0 6.5,2.0 7.0
map value=2.0 8.0
valueVector=[2.0, 8.0]
map 生成:2.0 6.5,2.0 8.0
map value=3.0 100.0
valueVector=[3.0, 100.0]
map 生成:3.0 102.0,3.0 100.0
map value=3.0 101.0
valueVector=[3.0, 101.0]
map 生成:3.0 102.0,3.0 101.0
map value=3.0 102.0
valueVector=[3.0, 102.0]
map 生成:3.0 102.0,3.0 102.0
map value=3.0 103.0
valueVector=[3.0, 103.0]
map 生成:3.0 102.0,3.0 103.0
map value=3.0 104.0
valueVector=[3.0, 104.0]
map 生成:3.0 102.0,3.0 104.0
2017-11-18 13:41:00,171 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) -
2017-11-18 13:41:00,172 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1457)) - Starting flush of map output
2017-11-18 13:41:00,172 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1475)) - Spilling map output
2017-11-18 13:41:00,172 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1476)) - bufstart = 0; bufend = 1872; bufvoid = 104857600
2017-11-18 13:41:00,172 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1478)) - kvstart = 26214396(104857584); kvend = 26214352(104857408); length = 45/6553600
----------------------KMeansCombiner---------------------
key=1.0 3.0
values:
value=1.0 4.0
value=1.0 3.0
value=1.0 2.0
sumWritableList=[1.0, 3.0]
----------------------KMeansCombiner---------------------
key=2.0 6.5
values:
value=2.0 8.0
value=2.0 7.0
value=2.0 6.0
value=2.0 5.0
sumWritableList=[2.0, 6.5]
----------------------KMeansCombiner---------------------
key=3.0 102.0
values:
value=3.0 104.0
value=3.0 103.0
value=3.0 102.0
value=3.0 101.0
value=3.0 100.0
sumWritableList=[3.0, 102.0]
2017-11-18 13:41:00,176 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1660)) - Finished spill 0
2017-11-18 13:41:00,179 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1001)) - Task:attempt_local1998692281_0005_m_000000_0 is done. And is in the process of committing
2017-11-18 13:41:00,184 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2017-11-18 13:41:00,184 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local1998692281_0005_m_000000_0' done.
2017-11-18 13:41:00,184 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local1998692281_0005_m_000000_0
2017-11-18 13:41:00,184 INFO [Thread-128] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2017-11-18 13:41:00,185 INFO [Thread-128] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for reduce tasks
2017-11-18 13:41:00,185 INFO [pool-16-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(302)) - Starting task: attempt_local1998692281_0005_r_000000_0
2017-11-18 13:41:00,186 INFO [pool-16-thread-1] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:00,187 INFO [pool-16-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@b033081
2017-11-18 13:41:00,187 INFO [pool-16-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:(197)) - MergerManager: memoryLimit=1283037568, maxSingleShuffleLimit=320759392, mergeThreshold=846804800, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-11-18 13:41:00,188 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local1998692281_0005_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-11-18 13:41:00,189 INFO [localfetcher#5] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(141)) - localfetcher#5 about to shuffle output of map attempt_local1998692281_0005_m_000000_0 decomp: 476 len: 480 to MEMORY
2017-11-18 13:41:00,189 INFO [localfetcher#5] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 476 bytes from map-output for attempt_local1998692281_0005_m_000000_0
2017-11-18 13:41:00,190 INFO [localfetcher#5] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(315)) - closeInMemoryFile -> map-output of size: 476, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->476
2017-11-18 13:41:00,190 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2017-11-18 13:41:00,190 INFO [pool-16-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:00,191 INFO [pool-16-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(687)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-11-18 13:41:00,193 INFO [pool-16-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:00,194 INFO [pool-16-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:00,194 INFO [pool-16-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(754)) - Merged 1 segments, 476 bytes to disk to satisfy reduce memory limit
2017-11-18 13:41:00,195 INFO [pool-16-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(784)) - Merging 1 files, 480 bytes from disk
2017-11-18 13:41:00,195 INFO [pool-16-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(799)) - Merging 0 segments, 0 bytes from memory into reduce
2017-11-18 13:41:00,195 INFO [pool-16-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:00,195 INFO [pool-16-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:00,196 INFO [pool-16-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
----------------------reduce---------------------
key=1.0 3.0
values:
1.0 3.0
reduce生成:1.0 3.0|1.0 3.0
----------------------reduce---------------------
key=2.0 6.5
values:
2.0 6.5
reduce生成:2.0 6.5|2.0 6.5
----------------------reduce---------------------
key=3.0 102.0
values:
3.0 102.0
reduce生成:3.0 102.0|3.0 102.0
2017-11-18 13:41:00,203 INFO [pool-16-thread-1] mapred.Task (Task.java:done(1001)) - Task:attempt_local1998692281_0005_r_000000_0 is done. And is in the process of committing
2017-11-18 13:41:00,204 INFO [pool-16-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:00,204 INFO [pool-16-thread-1] mapred.Task (Task.java:commit(1162)) - Task attempt_local1998692281_0005_r_000000_0 is allowed to commit now
2017-11-18 13:41:00,206 INFO [pool-16-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(439)) - Saved output of task 'attempt_local1998692281_0005_r_000000_0' to file:/media/chenjie/0009418200012FF3/ubuntu/kmeans/_temporary/0/task_local1998692281_0005_r_000000
2017-11-18 13:41:00,207 INFO [pool-16-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2017-11-18 13:41:00,207 INFO [pool-16-thread-1] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local1998692281_0005_r_000000_0' done.
2017-11-18 13:41:00,207 INFO [pool-16-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local1998692281_0005_r_000000_0
2017-11-18 13:41:00,207 INFO [Thread-128] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2017-11-18 13:41:01,099 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - Job job_local1998692281_0005 running in uber mode : false
2017-11-18 13:41:01,099 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1374)) - map 100% reduce 100%
2017-11-18 13:41:01,100 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Job job_local1998692281_0005 completed successfully
2017-11-18 13:41:01,106 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1392)) - Counters: 33
File System Counters
FILE: Number of bytes read=11828
FILE: Number of bytes written=2598434
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=12
Map output records=12
Map output bytes=1872
Map output materialized bytes=480
Input split bytes=130
Combine input records=12
Combine output records=3
Reduce input groups=3
Reduce shuffle bytes=480
Reduce input records=3
Reduce output records=3
Spilled Records=6
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=0
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=1503657984
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=106
File Output Format Counters
Bytes Written=38
readRandomCenterFromInputFile读取一行:1.0 3.0
readRandomCenterFromInputFile读取一行:2.0 6.5
readRandomCenterFromInputFile读取一行:3.0 102.0
2017-11-18 13:41:01,148 INFO [main] jvm.JvmMetrics (JvmMetrics.java:init(71)) - Cannot initialize JVM Metrics with processName=JobTracker, sessionId= - already initialized
2017-11-18 13:41:01,160 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-11-18 13:41:01,164 INFO [main] input.FileInputFormat (FileInputFormat.java:listStatus(281)) - Total input paths to process : 1
2017-11-18 13:41:01,179 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(199)) - number of splits:1
2017-11-18 13:41:01,195 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(288)) - Submitting tokens for job: job_local2141033359_0006
2017-11-18 13:41:01,278 INFO [main] mapreduce.Job (Job.java:submit(1301)) - The url to track the job: http://localhost:8080/
2017-11-18 13:41:01,278 INFO [Thread-155] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2017-11-18 13:41:01,278 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1346)) - Running job: job_local2141033359_0006
2017-11-18 13:41:01,279 INFO [Thread-155] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-11-18 13:41:01,286 INFO [Thread-155] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2017-11-18 13:41:01,286 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local2141033359_0006_m_000000_0
2017-11-18 13:41:01,288 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:01,288 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(753)) - Processing split: file:/media/chenjie/0009418200012FF3/ubuntu/kmeans_input_file.txt:0+106
2017-11-18 13:41:01,354 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1202)) - (EQUATOR) 0 kvi 26214396(104857584)
2017-11-18 13:41:01,354 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(995)) - mapreduce.task.io.sort.mb: 100
2017-11-18 13:41:01,355 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(996)) - soft limit at 83886080
2017-11-18 13:41:01,355 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(997)) - bufstart = 0; bufvoid = 104857600
2017-11-18 13:41:01,355 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - kvstart = 26214396; length = 6553600
2017-11-18 13:41:01,356 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(402)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
----------setup------------
----------centers------------
[1.0, 3.0]
[2.0, 6.5]
[3.0, 102.0]
map value=1.0 2.0
valueVector=[1.0, 2.0]
map 生成:1.0 3.0,1.0 2.0
map value=1.0 3.0
valueVector=[1.0, 3.0]
map 生成:1.0 3.0,1.0 3.0
map value=1.0 4.0
valueVector=[1.0, 4.0]
map 生成:1.0 3.0,1.0 4.0
map value=2.0 5.0
valueVector=[2.0, 5.0]
map 生成:2.0 6.5,2.0 5.0
map value=2.0 6.0
valueVector=[2.0, 6.0]
map 生成:2.0 6.5,2.0 6.0
map value=2.0 7.0
valueVector=[2.0, 7.0]
map 生成:2.0 6.5,2.0 7.0
map value=2.0 8.0
valueVector=[2.0, 8.0]
map 生成:2.0 6.5,2.0 8.0
map value=3.0 100.0
valueVector=[3.0, 100.0]
map 生成:3.0 102.0,3.0 100.0
map value=3.0 101.0
valueVector=[3.0, 101.0]
map 生成:3.0 102.0,3.0 101.0
map value=3.0 102.0
valueVector=[3.0, 102.0]
map 生成:3.0 102.0,3.0 102.0
map value=3.0 103.0
valueVector=[3.0, 103.0]
map 生成:3.0 102.0,3.0 103.0
map value=3.0 104.0
valueVector=[3.0, 104.0]
map 生成:3.0 102.0,3.0 104.0
2017-11-18 13:41:01,359 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) -
2017-11-18 13:41:01,359 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1457)) - Starting flush of map output
2017-11-18 13:41:01,359 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1475)) - Spilling map output
2017-11-18 13:41:01,359 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1476)) - bufstart = 0; bufend = 1872; bufvoid = 104857600
2017-11-18 13:41:01,360 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1478)) - kvstart = 26214396(104857584); kvend = 26214352(104857408); length = 45/6553600
----------------------KMeansCombiner---------------------
key=1.0 3.0
values:
value=1.0 4.0
value=1.0 3.0
value=1.0 2.0
sumWritableList=[1.0, 3.0]
----------------------KMeansCombiner---------------------
key=2.0 6.5
values:
value=2.0 8.0
value=2.0 7.0
value=2.0 6.0
value=2.0 5.0
sumWritableList=[2.0, 6.5]
----------------------KMeansCombiner---------------------
key=3.0 102.0
values:
value=3.0 104.0
value=3.0 103.0
value=3.0 102.0
value=3.0 101.0
value=3.0 100.0
sumWritableList=[3.0, 102.0]
2017-11-18 13:41:01,363 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1660)) - Finished spill 0
2017-11-18 13:41:01,364 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1001)) - Task:attempt_local2141033359_0006_m_000000_0 is done. And is in the process of committing
2017-11-18 13:41:01,365 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2017-11-18 13:41:01,366 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local2141033359_0006_m_000000_0' done.
2017-11-18 13:41:01,366 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local2141033359_0006_m_000000_0
2017-11-18 13:41:01,366 INFO [Thread-155] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2017-11-18 13:41:01,366 INFO [Thread-155] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for reduce tasks
2017-11-18 13:41:01,366 INFO [pool-19-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(302)) - Starting task: attempt_local2141033359_0006_r_000000_0
2017-11-18 13:41:01,368 INFO [pool-19-thread-1] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:01,368 INFO [pool-19-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@693dc0d3
2017-11-18 13:41:01,369 INFO [pool-19-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:(197)) - MergerManager: memoryLimit=1283037568, maxSingleShuffleLimit=320759392, mergeThreshold=846804800, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-11-18 13:41:01,383 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local2141033359_0006_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-11-18 13:41:01,388 INFO [localfetcher#6] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(141)) - localfetcher#6 about to shuffle output of map attempt_local2141033359_0006_m_000000_0 decomp: 476 len: 480 to MEMORY
2017-11-18 13:41:01,388 INFO [localfetcher#6] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 476 bytes from map-output for attempt_local2141033359_0006_m_000000_0
2017-11-18 13:41:01,389 INFO [localfetcher#6] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(315)) - closeInMemoryFile -> map-output of size: 476, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->476
2017-11-18 13:41:01,389 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2017-11-18 13:41:01,390 INFO [pool-19-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:01,390 INFO [pool-19-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(687)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-11-18 13:41:01,391 INFO [pool-19-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:01,391 INFO [pool-19-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:01,391 INFO [pool-19-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(754)) - Merged 1 segments, 476 bytes to disk to satisfy reduce memory limit
2017-11-18 13:41:01,391 INFO [pool-19-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(784)) - Merging 1 files, 480 bytes from disk
2017-11-18 13:41:01,392 INFO [pool-19-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(799)) - Merging 0 segments, 0 bytes from memory into reduce
2017-11-18 13:41:01,392 INFO [pool-19-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:01,392 INFO [pool-19-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:01,392 INFO [pool-19-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
----------------------reduce---------------------
key=1.0 3.0
values:
1.0 3.0
reduce生成:1.0 3.0|1.0 3.0
----------------------reduce---------------------
key=2.0 6.5
values:
2.0 6.5
reduce生成:2.0 6.5|2.0 6.5
----------------------reduce---------------------
key=3.0 102.0
values:
3.0 102.0
reduce生成:3.0 102.0|3.0 102.0
2017-11-18 13:41:01,400 INFO [pool-19-thread-1] mapred.Task (Task.java:done(1001)) - Task:attempt_local2141033359_0006_r_000000_0 is done. And is in the process of committing
2017-11-18 13:41:01,401 INFO [pool-19-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:01,401 INFO [pool-19-thread-1] mapred.Task (Task.java:commit(1162)) - Task attempt_local2141033359_0006_r_000000_0 is allowed to commit now
2017-11-18 13:41:01,402 INFO [pool-19-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(439)) - Saved output of task 'attempt_local2141033359_0006_r_000000_0' to file:/media/chenjie/0009418200012FF3/ubuntu/kmeans/_temporary/0/task_local2141033359_0006_r_000000
2017-11-18 13:41:01,403 INFO [pool-19-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2017-11-18 13:41:01,403 INFO [pool-19-thread-1] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local2141033359_0006_r_000000_0' done.
2017-11-18 13:41:01,403 INFO [pool-19-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local2141033359_0006_r_000000_0
2017-11-18 13:41:01,403 INFO [Thread-155] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2017-11-18 13:41:02,279 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - Job job_local2141033359_0006 running in uber mode : false
2017-11-18 13:41:02,280 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1374)) - map 100% reduce 100%
2017-11-18 13:41:02,281 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Job job_local2141033359_0006 completed successfully
2017-11-18 13:41:02,288 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1392)) - Counters: 33
File System Counters
FILE: Number of bytes read=14392
FILE: Number of bytes written=3119326
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=12
Map output records=12
Map output bytes=1872
Map output materialized bytes=480
Input split bytes=130
Combine input records=12
Combine output records=3
Reduce input groups=3
Reduce shuffle bytes=480
Reduce input records=3
Reduce output records=3
Spilled Records=6
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=12
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=1714946048
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=106
File Output Format Counters
Bytes Written=38
readRandomCenterFromInputFile读取一行:1.0 3.0
readRandomCenterFromInputFile读取一行:2.0 6.5
readRandomCenterFromInputFile读取一行:3.0 102.0
2017-11-18 13:41:02,316 INFO [main] jvm.JvmMetrics (JvmMetrics.java:init(71)) - Cannot initialize JVM Metrics with processName=JobTracker, sessionId= - already initialized
2017-11-18 13:41:02,324 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-11-18 13:41:02,325 INFO [main] input.FileInputFormat (FileInputFormat.java:listStatus(281)) - Total input paths to process : 1
2017-11-18 13:41:02,346 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(199)) - number of splits:1
2017-11-18 13:41:02,356 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(288)) - Submitting tokens for job: job_local382131182_0007
2017-11-18 13:41:02,419 INFO [main] mapreduce.Job (Job.java:submit(1301)) - The url to track the job: http://localhost:8080/
2017-11-18 13:41:02,419 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1346)) - Running job: job_local382131182_0007
2017-11-18 13:41:02,419 INFO [Thread-182] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2017-11-18 13:41:02,420 INFO [Thread-182] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-11-18 13:41:02,422 INFO [Thread-182] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2017-11-18 13:41:02,422 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local382131182_0007_m_000000_0
2017-11-18 13:41:02,423 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:02,424 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(753)) - Processing split: file:/media/chenjie/0009418200012FF3/ubuntu/kmeans_input_file.txt:0+106
2017-11-18 13:41:02,491 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1202)) - (EQUATOR) 0 kvi 26214396(104857584)
2017-11-18 13:41:02,491 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(995)) - mapreduce.task.io.sort.mb: 100
2017-11-18 13:41:02,491 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(996)) - soft limit at 83886080
2017-11-18 13:41:02,492 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(997)) - bufstart = 0; bufvoid = 104857600
2017-11-18 13:41:02,492 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - kvstart = 26214396; length = 6553600
2017-11-18 13:41:02,492 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(402)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
----------setup------------
----------centers------------
[1.0, 3.0]
[2.0, 6.5]
[3.0, 102.0]
map value=1.0 2.0
valueVector=[1.0, 2.0]
map 生成:1.0 3.0,1.0 2.0
map value=1.0 3.0
valueVector=[1.0, 3.0]
map 生成:1.0 3.0,1.0 3.0
map value=1.0 4.0
valueVector=[1.0, 4.0]
map 生成:1.0 3.0,1.0 4.0
map value=2.0 5.0
valueVector=[2.0, 5.0]
map 生成:2.0 6.5,2.0 5.0
map value=2.0 6.0
valueVector=[2.0, 6.0]
map 生成:2.0 6.5,2.0 6.0
map value=2.0 7.0
valueVector=[2.0, 7.0]
map 生成:2.0 6.5,2.0 7.0
map value=2.0 8.0
valueVector=[2.0, 8.0]
map 生成:2.0 6.5,2.0 8.0
map value=3.0 100.0
valueVector=[3.0, 100.0]
map 生成:3.0 102.0,3.0 100.0
map value=3.0 101.0
valueVector=[3.0, 101.0]
map 生成:3.0 102.0,3.0 101.0
map value=3.0 102.0
valueVector=[3.0, 102.0]
map 生成:3.0 102.0,3.0 102.0
map value=3.0 103.0
valueVector=[3.0, 103.0]
map 生成:3.0 102.0,3.0 103.0
map value=3.0 104.0
valueVector=[3.0, 104.0]
map 生成:3.0 102.0,3.0 104.0
2017-11-18 13:41:02,495 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) -
2017-11-18 13:41:02,495 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1457)) - Starting flush of map output
2017-11-18 13:41:02,495 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1475)) - Spilling map output
2017-11-18 13:41:02,495 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1476)) - bufstart = 0; bufend = 1872; bufvoid = 104857600
2017-11-18 13:41:02,495 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1478)) - kvstart = 26214396(104857584); kvend = 26214352(104857408); length = 45/6553600
----------------------KMeansCombiner---------------------
key=1.0 3.0
values:
value=1.0 4.0
value=1.0 3.0
value=1.0 2.0
sumWritableList=[1.0, 3.0]
----------------------KMeansCombiner---------------------
key=2.0 6.5
values:
value=2.0 8.0
value=2.0 7.0
value=2.0 6.0
value=2.0 5.0
sumWritableList=[2.0, 6.5]
----------------------KMeansCombiner---------------------
key=3.0 102.0
values:
value=3.0 104.0
value=3.0 103.0
value=3.0 102.0
value=3.0 101.0
value=3.0 100.0
sumWritableList=[3.0, 102.0]
2017-11-18 13:41:02,500 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1660)) - Finished spill 0
2017-11-18 13:41:02,500 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1001)) - Task:attempt_local382131182_0007_m_000000_0 is done. And is in the process of committing
2017-11-18 13:41:02,502 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2017-11-18 13:41:02,502 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local382131182_0007_m_000000_0' done.
2017-11-18 13:41:02,502 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local382131182_0007_m_000000_0
2017-11-18 13:41:02,502 INFO [Thread-182] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2017-11-18 13:41:02,503 INFO [Thread-182] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for reduce tasks
2017-11-18 13:41:02,503 INFO [pool-22-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(302)) - Starting task: attempt_local382131182_0007_r_000000_0
2017-11-18 13:41:02,504 INFO [pool-22-thread-1] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:02,504 INFO [pool-22-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@2c238249
2017-11-18 13:41:02,504 INFO [pool-22-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:(197)) - MergerManager: memoryLimit=1283037568, maxSingleShuffleLimit=320759392, mergeThreshold=846804800, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-11-18 13:41:02,505 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local382131182_0007_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-11-18 13:41:02,508 INFO [localfetcher#7] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(141)) - localfetcher#7 about to shuffle output of map attempt_local382131182_0007_m_000000_0 decomp: 476 len: 480 to MEMORY
2017-11-18 13:41:02,509 INFO [localfetcher#7] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 476 bytes from map-output for attempt_local382131182_0007_m_000000_0
2017-11-18 13:41:02,509 INFO [localfetcher#7] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(315)) - closeInMemoryFile -> map-output of size: 476, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->476
2017-11-18 13:41:02,512 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2017-11-18 13:41:02,512 INFO [pool-22-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:02,512 INFO [pool-22-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(687)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-11-18 13:41:02,514 INFO [pool-22-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:02,514 INFO [pool-22-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:02,514 INFO [pool-22-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(754)) - Merged 1 segments, 476 bytes to disk to satisfy reduce memory limit
2017-11-18 13:41:02,515 INFO [pool-22-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(784)) - Merging 1 files, 480 bytes from disk
2017-11-18 13:41:02,515 INFO [pool-22-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(799)) - Merging 0 segments, 0 bytes from memory into reduce
2017-11-18 13:41:02,515 INFO [pool-22-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:02,515 INFO [pool-22-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:02,516 INFO [pool-22-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
----------------------reduce---------------------
key=1.0 3.0
values:
1.0 3.0
reduce生成:1.0 3.0|1.0 3.0
----------------------reduce---------------------
key=2.0 6.5
values:
2.0 6.5
reduce生成:2.0 6.5|2.0 6.5
----------------------reduce---------------------
key=3.0 102.0
values:
3.0 102.0
reduce生成:3.0 102.0|3.0 102.0
2017-11-18 13:41:02,523 INFO [pool-22-thread-1] mapred.Task (Task.java:done(1001)) - Task:attempt_local382131182_0007_r_000000_0 is done. And is in the process of committing
2017-11-18 13:41:02,524 INFO [pool-22-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:02,524 INFO [pool-22-thread-1] mapred.Task (Task.java:commit(1162)) - Task attempt_local382131182_0007_r_000000_0 is allowed to commit now
2017-11-18 13:41:02,525 INFO [pool-22-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(439)) - Saved output of task 'attempt_local382131182_0007_r_000000_0' to file:/media/chenjie/0009418200012FF3/ubuntu/kmeans/_temporary/0/task_local382131182_0007_r_000000
2017-11-18 13:41:02,526 INFO [pool-22-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2017-11-18 13:41:02,526 INFO [pool-22-thread-1] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local382131182_0007_r_000000_0' done.
2017-11-18 13:41:02,526 INFO [pool-22-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local382131182_0007_r_000000_0
2017-11-18 13:41:02,526 INFO [Thread-182] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2017-11-18 13:41:03,420 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - Job job_local382131182_0007 running in uber mode : false
2017-11-18 13:41:03,420 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1374)) - map 100% reduce 100%
2017-11-18 13:41:03,421 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Job job_local382131182_0007 completed successfully
2017-11-18 13:41:03,427 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1392)) - Counters: 33
File System Counters
FILE: Number of bytes read=16956
FILE: Number of bytes written=3637458
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=12
Map output records=12
Map output bytes=1872
Map output materialized bytes=480
Input split bytes=130
Combine input records=12
Combine output records=3
Reduce input groups=3
Reduce shuffle bytes=480
Reduce input records=3
Reduce output records=3
Spilled Records=6
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=0
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=1926234112
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=106
File Output Format Counters
Bytes Written=38
readRandomCenterFromInputFile读取一行:1.0 3.0
readRandomCenterFromInputFile读取一行:2.0 6.5
readRandomCenterFromInputFile读取一行:3.0 102.0
2017-11-18 13:41:03,478 INFO [main] jvm.JvmMetrics (JvmMetrics.java:init(71)) - Cannot initialize JVM Metrics with processName=JobTracker, sessionId= - already initialized
2017-11-18 13:41:03,482 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-11-18 13:41:03,483 INFO [main] input.FileInputFormat (FileInputFormat.java:listStatus(281)) - Total input paths to process : 1
2017-11-18 13:41:03,508 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(199)) - number of splits:1
2017-11-18 13:41:03,520 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(288)) - Submitting tokens for job: job_local429797569_0008
2017-11-18 13:41:03,595 INFO [main] mapreduce.Job (Job.java:submit(1301)) - The url to track the job: http://localhost:8080/
2017-11-18 13:41:03,595 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1346)) - Running job: job_local429797569_0008
2017-11-18 13:41:03,600 INFO [Thread-209] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2017-11-18 13:41:03,600 INFO [Thread-209] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-11-18 13:41:03,605 INFO [Thread-209] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2017-11-18 13:41:03,605 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local429797569_0008_m_000000_0
2017-11-18 13:41:03,610 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:03,613 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(753)) - Processing split: file:/media/chenjie/0009418200012FF3/ubuntu/kmeans_input_file.txt:0+106
2017-11-18 13:41:03,706 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1202)) - (EQUATOR) 0 kvi 26214396(104857584)
2017-11-18 13:41:03,706 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(995)) - mapreduce.task.io.sort.mb: 100
2017-11-18 13:41:03,706 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(996)) - soft limit at 83886080
2017-11-18 13:41:03,707 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(997)) - bufstart = 0; bufvoid = 104857600
2017-11-18 13:41:03,707 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - kvstart = 26214396; length = 6553600
2017-11-18 13:41:03,708 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(402)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
----------setup------------
----------centers------------
[1.0, 3.0]
[2.0, 6.5]
[3.0, 102.0]
map value=1.0 2.0
valueVector=[1.0, 2.0]
map 生成:1.0 3.0,1.0 2.0
map value=1.0 3.0
valueVector=[1.0, 3.0]
map 生成:1.0 3.0,1.0 3.0
map value=1.0 4.0
valueVector=[1.0, 4.0]
map 生成:1.0 3.0,1.0 4.0
map value=2.0 5.0
valueVector=[2.0, 5.0]
map 生成:2.0 6.5,2.0 5.0
map value=2.0 6.0
valueVector=[2.0, 6.0]
map 生成:2.0 6.5,2.0 6.0
map value=2.0 7.0
valueVector=[2.0, 7.0]
map 生成:2.0 6.5,2.0 7.0
map value=2.0 8.0
valueVector=[2.0, 8.0]
map 生成:2.0 6.5,2.0 8.0
map value=3.0 100.0
valueVector=[3.0, 100.0]
map 生成:3.0 102.0,3.0 100.0
map value=3.0 101.0
valueVector=[3.0, 101.0]
map 生成:3.0 102.0,3.0 101.0
map value=3.0 102.0
valueVector=[3.0, 102.0]
map 生成:3.0 102.0,3.0 102.0
map value=3.0 103.0
valueVector=[3.0, 103.0]
map 生成:3.0 102.0,3.0 103.0
map value=3.0 104.0
valueVector=[3.0, 104.0]
map 生成:3.0 102.0,3.0 104.0
2017-11-18 13:41:03,711 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) -
2017-11-18 13:41:03,711 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1457)) - Starting flush of map output
2017-11-18 13:41:03,711 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1475)) - Spilling map output
2017-11-18 13:41:03,711 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1476)) - bufstart = 0; bufend = 1872; bufvoid = 104857600
2017-11-18 13:41:03,711 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1478)) - kvstart = 26214396(104857584); kvend = 26214352(104857408); length = 45/6553600
----------------------KMeansCombiner---------------------
key=1.0 3.0
values:
value=1.0 4.0
value=1.0 3.0
value=1.0 2.0
sumWritableList=[1.0, 3.0]
----------------------KMeansCombiner---------------------
key=2.0 6.5
values:
value=2.0 8.0
value=2.0 7.0
value=2.0 6.0
value=2.0 5.0
sumWritableList=[2.0, 6.5]
----------------------KMeansCombiner---------------------
key=3.0 102.0
values:
value=3.0 104.0
value=3.0 103.0
value=3.0 102.0
value=3.0 101.0
value=3.0 100.0
sumWritableList=[3.0, 102.0]
2017-11-18 13:41:03,714 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1660)) - Finished spill 0
2017-11-18 13:41:03,716 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1001)) - Task:attempt_local429797569_0008_m_000000_0 is done. And is in the process of committing
2017-11-18 13:41:03,718 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2017-11-18 13:41:03,718 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local429797569_0008_m_000000_0' done.
2017-11-18 13:41:03,718 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local429797569_0008_m_000000_0
2017-11-18 13:41:03,719 INFO [Thread-209] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2017-11-18 13:41:03,720 INFO [Thread-209] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for reduce tasks
2017-11-18 13:41:03,720 INFO [pool-25-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(302)) - Starting task: attempt_local429797569_0008_r_000000_0
2017-11-18 13:41:03,721 INFO [pool-25-thread-1] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:03,721 INFO [pool-25-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@4e1feafc
2017-11-18 13:41:03,724 INFO [pool-25-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:(197)) - MergerManager: memoryLimit=1283037568, maxSingleShuffleLimit=320759392, mergeThreshold=846804800, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-11-18 13:41:03,724 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local429797569_0008_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-11-18 13:41:03,725 INFO [localfetcher#8] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(141)) - localfetcher#8 about to shuffle output of map attempt_local429797569_0008_m_000000_0 decomp: 476 len: 480 to MEMORY
2017-11-18 13:41:03,725 INFO [localfetcher#8] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 476 bytes from map-output for attempt_local429797569_0008_m_000000_0
2017-11-18 13:41:03,726 INFO [localfetcher#8] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(315)) - closeInMemoryFile -> map-output of size: 476, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->476
2017-11-18 13:41:03,726 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2017-11-18 13:41:03,726 INFO [pool-25-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:03,726 INFO [pool-25-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(687)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-11-18 13:41:03,728 INFO [pool-25-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:03,728 INFO [pool-25-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:03,728 INFO [pool-25-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(754)) - Merged 1 segments, 476 bytes to disk to satisfy reduce memory limit
2017-11-18 13:41:03,728 INFO [pool-25-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(784)) - Merging 1 files, 480 bytes from disk
2017-11-18 13:41:03,728 INFO [pool-25-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(799)) - Merging 0 segments, 0 bytes from memory into reduce
2017-11-18 13:41:03,729 INFO [pool-25-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:03,729 INFO [pool-25-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:03,729 INFO [pool-25-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
----------------------reduce---------------------
key=1.0 3.0
values:
1.0 3.0
reduce生成:1.0 3.0|1.0 3.0
----------------------reduce---------------------
key=2.0 6.5
values:
2.0 6.5
reduce生成:2.0 6.5|2.0 6.5
----------------------reduce---------------------
key=3.0 102.0
values:
3.0 102.0
reduce生成:3.0 102.0|3.0 102.0
2017-11-18 13:41:03,736 INFO [pool-25-thread-1] mapred.Task (Task.java:done(1001)) - Task:attempt_local429797569_0008_r_000000_0 is done. And is in the process of committing
2017-11-18 13:41:03,738 INFO [pool-25-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:03,738 INFO [pool-25-thread-1] mapred.Task (Task.java:commit(1162)) - Task attempt_local429797569_0008_r_000000_0 is allowed to commit now
2017-11-18 13:41:03,739 INFO [pool-25-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(439)) - Saved output of task 'attempt_local429797569_0008_r_000000_0' to file:/media/chenjie/0009418200012FF3/ubuntu/kmeans/_temporary/0/task_local429797569_0008_r_000000
2017-11-18 13:41:03,740 INFO [pool-25-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2017-11-18 13:41:03,740 INFO [pool-25-thread-1] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local429797569_0008_r_000000_0' done.
2017-11-18 13:41:03,740 INFO [pool-25-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local429797569_0008_r_000000_0
2017-11-18 13:41:03,740 INFO [Thread-209] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2017-11-18 13:41:04,595 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - Job job_local429797569_0008 running in uber mode : false
2017-11-18 13:41:04,596 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1374)) - map 100% reduce 100%
2017-11-18 13:41:04,597 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Job job_local429797569_0008 completed successfully
2017-11-18 13:41:04,607 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1392)) - Counters: 33
File System Counters
FILE: Number of bytes read=19520
FILE: Number of bytes written=4155590
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=12
Map output records=12
Map output bytes=1872
Map output materialized bytes=480
Input split bytes=130
Combine input records=12
Combine output records=3
Reduce input groups=3
Reduce shuffle bytes=480
Reduce input records=3
Reduce output records=3
Spilled Records=6
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=0
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=2136997888
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=106
File Output Format Counters
Bytes Written=38
readRandomCenterFromInputFile读取一行:1.0 3.0
readRandomCenterFromInputFile读取一行:2.0 6.5
readRandomCenterFromInputFile读取一行:3.0 102.0
2017-11-18 13:41:04,641 INFO [main] jvm.JvmMetrics (JvmMetrics.java:init(71)) - Cannot initialize JVM Metrics with processName=JobTracker, sessionId= - already initialized
2017-11-18 13:41:04,648 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-11-18 13:41:04,649 INFO [main] input.FileInputFormat (FileInputFormat.java:listStatus(281)) - Total input paths to process : 1
2017-11-18 13:41:04,660 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(199)) - number of splits:1
2017-11-18 13:41:04,684 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(288)) - Submitting tokens for job: job_local1992574236_0009
2017-11-18 13:41:04,741 INFO [main] mapreduce.Job (Job.java:submit(1301)) - The url to track the job: http://localhost:8080/
2017-11-18 13:41:04,742 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1346)) - Running job: job_local1992574236_0009
2017-11-18 13:41:04,742 INFO [Thread-236] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2017-11-18 13:41:04,742 INFO [Thread-236] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-11-18 13:41:04,744 INFO [Thread-236] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2017-11-18 13:41:04,744 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local1992574236_0009_m_000000_0
2017-11-18 13:41:04,745 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:04,745 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(753)) - Processing split: file:/media/chenjie/0009418200012FF3/ubuntu/kmeans_input_file.txt:0+106
2017-11-18 13:41:04,840 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1202)) - (EQUATOR) 0 kvi 26214396(104857584)
2017-11-18 13:41:04,840 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(995)) - mapreduce.task.io.sort.mb: 100
2017-11-18 13:41:04,840 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(996)) - soft limit at 83886080
2017-11-18 13:41:04,841 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(997)) - bufstart = 0; bufvoid = 104857600
2017-11-18 13:41:04,841 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - kvstart = 26214396; length = 6553600
2017-11-18 13:41:04,841 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(402)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
----------setup------------
----------centers------------
[1.0, 3.0]
[2.0, 6.5]
[3.0, 102.0]
map value=1.0 2.0
valueVector=[1.0, 2.0]
map 生成:1.0 3.0,1.0 2.0
map value=1.0 3.0
valueVector=[1.0, 3.0]
map 生成:1.0 3.0,1.0 3.0
map value=1.0 4.0
valueVector=[1.0, 4.0]
map 生成:1.0 3.0,1.0 4.0
map value=2.0 5.0
valueVector=[2.0, 5.0]
map 生成:2.0 6.5,2.0 5.0
map value=2.0 6.0
valueVector=[2.0, 6.0]
map 生成:2.0 6.5,2.0 6.0
map value=2.0 7.0
valueVector=[2.0, 7.0]
map 生成:2.0 6.5,2.0 7.0
map value=2.0 8.0
valueVector=[2.0, 8.0]
map 生成:2.0 6.5,2.0 8.0
map value=3.0 100.0
valueVector=[3.0, 100.0]
map 生成:3.0 102.0,3.0 100.0
map value=3.0 101.0
valueVector=[3.0, 101.0]
map 生成:3.0 102.0,3.0 101.0
map value=3.0 102.0
valueVector=[3.0, 102.0]
map 生成:3.0 102.0,3.0 102.0
map value=3.0 103.0
valueVector=[3.0, 103.0]
map 生成:3.0 102.0,3.0 103.0
map value=3.0 104.0
valueVector=[3.0, 104.0]
map 生成:3.0 102.0,3.0 104.0
2017-11-18 13:41:04,845 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) -
2017-11-18 13:41:04,845 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1457)) - Starting flush of map output
2017-11-18 13:41:04,845 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1475)) - Spilling map output
2017-11-18 13:41:04,845 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1476)) - bufstart = 0; bufend = 1872; bufvoid = 104857600
2017-11-18 13:41:04,845 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1478)) - kvstart = 26214396(104857584); kvend = 26214352(104857408); length = 45/6553600
----------------------KMeansCombiner---------------------
key=1.0 3.0
values:
value=1.0 4.0
value=1.0 3.0
value=1.0 2.0
sumWritableList=[1.0, 3.0]
----------------------KMeansCombiner---------------------
key=2.0 6.5
values:
value=2.0 8.0
value=2.0 7.0
value=2.0 6.0
value=2.0 5.0
sumWritableList=[2.0, 6.5]
----------------------KMeansCombiner---------------------
key=3.0 102.0
values:
value=3.0 104.0
value=3.0 103.0
value=3.0 102.0
value=3.0 101.0
value=3.0 100.0
sumWritableList=[3.0, 102.0]
2017-11-18 13:41:04,848 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1660)) - Finished spill 0
2017-11-18 13:41:04,849 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1001)) - Task:attempt_local1992574236_0009_m_000000_0 is done. And is in the process of committing
2017-11-18 13:41:04,850 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2017-11-18 13:41:04,850 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local1992574236_0009_m_000000_0' done.
2017-11-18 13:41:04,850 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local1992574236_0009_m_000000_0
2017-11-18 13:41:04,851 INFO [Thread-236] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2017-11-18 13:41:04,851 INFO [Thread-236] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for reduce tasks
2017-11-18 13:41:04,851 INFO [pool-28-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(302)) - Starting task: attempt_local1992574236_0009_r_000000_0
2017-11-18 13:41:04,852 INFO [pool-28-thread-1] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:04,852 INFO [pool-28-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@2ff22909
2017-11-18 13:41:04,852 INFO [pool-28-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:(197)) - MergerManager: memoryLimit=1283037568, maxSingleShuffleLimit=320759392, mergeThreshold=846804800, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-11-18 13:41:04,853 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local1992574236_0009_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-11-18 13:41:04,853 INFO [localfetcher#9] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(141)) - localfetcher#9 about to shuffle output of map attempt_local1992574236_0009_m_000000_0 decomp: 476 len: 480 to MEMORY
2017-11-18 13:41:04,854 INFO [localfetcher#9] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 476 bytes from map-output for attempt_local1992574236_0009_m_000000_0
2017-11-18 13:41:04,854 INFO [localfetcher#9] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(315)) - closeInMemoryFile -> map-output of size: 476, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->476
2017-11-18 13:41:04,854 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2017-11-18 13:41:04,855 INFO [pool-28-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:04,855 INFO [pool-28-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(687)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-11-18 13:41:04,856 INFO [pool-28-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:04,856 INFO [pool-28-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:04,856 INFO [pool-28-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(754)) - Merged 1 segments, 476 bytes to disk to satisfy reduce memory limit
2017-11-18 13:41:04,856 INFO [pool-28-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(784)) - Merging 1 files, 480 bytes from disk
2017-11-18 13:41:04,856 INFO [pool-28-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(799)) - Merging 0 segments, 0 bytes from memory into reduce
2017-11-18 13:41:04,856 INFO [pool-28-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:04,857 INFO [pool-28-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:04,857 INFO [pool-28-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
----------------------reduce---------------------
key=1.0 3.0
values:
1.0 3.0
reduce生成:1.0 3.0|1.0 3.0
----------------------reduce---------------------
key=2.0 6.5
values:
2.0 6.5
reduce生成:2.0 6.5|2.0 6.5
----------------------reduce---------------------
key=3.0 102.0
values:
3.0 102.0
reduce生成:3.0 102.0|3.0 102.0
2017-11-18 13:41:04,864 INFO [pool-28-thread-1] mapred.Task (Task.java:done(1001)) - Task:attempt_local1992574236_0009_r_000000_0 is done. And is in the process of committing
2017-11-18 13:41:04,865 INFO [pool-28-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:04,865 INFO [pool-28-thread-1] mapred.Task (Task.java:commit(1162)) - Task attempt_local1992574236_0009_r_000000_0 is allowed to commit now
2017-11-18 13:41:04,866 INFO [pool-28-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(439)) - Saved output of task 'attempt_local1992574236_0009_r_000000_0' to file:/media/chenjie/0009418200012FF3/ubuntu/kmeans/_temporary/0/task_local1992574236_0009_r_000000
2017-11-18 13:41:04,867 INFO [pool-28-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2017-11-18 13:41:04,867 INFO [pool-28-thread-1] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local1992574236_0009_r_000000_0' done.
2017-11-18 13:41:04,867 INFO [pool-28-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local1992574236_0009_r_000000_0
2017-11-18 13:41:04,867 INFO [Thread-236] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2017-11-18 13:41:05,742 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - Job job_local1992574236_0009 running in uber mode : false
2017-11-18 13:41:05,743 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1374)) - map 100% reduce 100%
2017-11-18 13:41:05,743 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Job job_local1992574236_0009 completed successfully
2017-11-18 13:41:05,744 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1392)) - Counters: 33
File System Counters
FILE: Number of bytes read=22084
FILE: Number of bytes written=4676482
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=12
Map output records=12
Map output bytes=1872
Map output materialized bytes=480
Input split bytes=130
Combine input records=12
Combine output records=3
Reduce input groups=3
Reduce shuffle bytes=480
Reduce input records=3
Reduce output records=3
Spilled Records=6
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=0
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=2326790144
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=106
File Output Format Counters
Bytes Written=38
readRandomCenterFromInputFile读取一行:1.0 3.0
readRandomCenterFromInputFile读取一行:2.0 6.5
readRandomCenterFromInputFile读取一行:3.0 102.0
2017-11-18 13:41:05,768 INFO [main] jvm.JvmMetrics (JvmMetrics.java:init(71)) - Cannot initialize JVM Metrics with processName=JobTracker, sessionId= - already initialized
2017-11-18 13:41:05,774 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2017-11-18 13:41:05,775 INFO [main] input.FileInputFormat (FileInputFormat.java:listStatus(281)) - Total input paths to process : 1
2017-11-18 13:41:05,800 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(199)) - number of splits:1
2017-11-18 13:41:05,810 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(288)) - Submitting tokens for job: job_local1118063138_0010
2017-11-18 13:41:05,863 INFO [main] mapreduce.Job (Job.java:submit(1301)) - The url to track the job: http://localhost:8080/
2017-11-18 13:41:05,863 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1346)) - Running job: job_local1118063138_0010
2017-11-18 13:41:05,863 INFO [Thread-263] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2017-11-18 13:41:05,867 INFO [Thread-263] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2017-11-18 13:41:05,869 INFO [Thread-263] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2017-11-18 13:41:05,869 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local1118063138_0010_m_000000_0
2017-11-18 13:41:05,870 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:05,870 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(753)) - Processing split: file:/media/chenjie/0009418200012FF3/ubuntu/kmeans_input_file.txt:0+106
2017-11-18 13:41:05,956 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1202)) - (EQUATOR) 0 kvi 26214396(104857584)
2017-11-18 13:41:05,957 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(995)) - mapreduce.task.io.sort.mb: 100
2017-11-18 13:41:05,957 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(996)) - soft limit at 83886080
2017-11-18 13:41:05,957 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(997)) - bufstart = 0; bufvoid = 104857600
2017-11-18 13:41:05,957 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - kvstart = 26214396; length = 6553600
2017-11-18 13:41:05,958 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(402)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
----------setup------------
----------centers------------
[1.0, 3.0]
[2.0, 6.5]
[3.0, 102.0]
map value=1.0 2.0
valueVector=[1.0, 2.0]
map 生成:1.0 3.0,1.0 2.0
map value=1.0 3.0
valueVector=[1.0, 3.0]
map 生成:1.0 3.0,1.0 3.0
map value=1.0 4.0
valueVector=[1.0, 4.0]
map 生成:1.0 3.0,1.0 4.0
map value=2.0 5.0
valueVector=[2.0, 5.0]
map 生成:2.0 6.5,2.0 5.0
map value=2.0 6.0
valueVector=[2.0, 6.0]
map 生成:2.0 6.5,2.0 6.0
map value=2.0 7.0
valueVector=[2.0, 7.0]
map 生成:2.0 6.5,2.0 7.0
map value=2.0 8.0
valueVector=[2.0, 8.0]
map 生成:2.0 6.5,2.0 8.0
map value=3.0 100.0
valueVector=[3.0, 100.0]
map 生成:3.0 102.0,3.0 100.0
map value=3.0 101.0
valueVector=[3.0, 101.0]
map 生成:3.0 102.0,3.0 101.0
map value=3.0 102.0
valueVector=[3.0, 102.0]
map 生成:3.0 102.0,3.0 102.0
map value=3.0 103.0
valueVector=[3.0, 103.0]
map 生成:3.0 102.0,3.0 103.0
map value=3.0 104.0
valueVector=[3.0, 104.0]
map 生成:3.0 102.0,3.0 104.0
2017-11-18 13:41:05,960 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) -
2017-11-18 13:41:05,961 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1457)) - Starting flush of map output
2017-11-18 13:41:05,961 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1475)) - Spilling map output
2017-11-18 13:41:05,961 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1476)) - bufstart = 0; bufend = 1872; bufvoid = 104857600
2017-11-18 13:41:05,961 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1478)) - kvstart = 26214396(104857584); kvend = 26214352(104857408); length = 45/6553600
----------------------KMeansCombiner---------------------
key=1.0 3.0
values:
value=1.0 4.0
value=1.0 3.0
value=1.0 2.0
sumWritableList=[1.0, 3.0]
----------------------KMeansCombiner---------------------
key=2.0 6.5
values:
value=2.0 8.0
value=2.0 7.0
value=2.0 6.0
value=2.0 5.0
sumWritableList=[2.0, 6.5]
----------------------KMeansCombiner---------------------
key=3.0 102.0
values:
value=3.0 104.0
value=3.0 103.0
value=3.0 102.0
value=3.0 101.0
value=3.0 100.0
sumWritableList=[3.0, 102.0]
2017-11-18 13:41:05,964 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1660)) - Finished spill 0
2017-11-18 13:41:05,966 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1001)) - Task:attempt_local1118063138_0010_m_000000_0 is done. And is in the process of committing
2017-11-18 13:41:05,968 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2017-11-18 13:41:05,968 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local1118063138_0010_m_000000_0' done.
2017-11-18 13:41:05,968 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local1118063138_0010_m_000000_0
2017-11-18 13:41:05,968 INFO [Thread-263] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2017-11-18 13:41:05,969 INFO [Thread-263] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for reduce tasks
2017-11-18 13:41:05,969 INFO [pool-31-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(302)) - Starting task: attempt_local1118063138_0010_r_000000_0
2017-11-18 13:41:05,970 INFO [pool-31-thread-1] mapred.Task (Task.java:initialize(587)) - Using ResourceCalculatorProcessTree : [ ]
2017-11-18 13:41:05,970 INFO [pool-31-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@38e03c4d
2017-11-18 13:41:05,972 INFO [pool-31-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:(197)) - MergerManager: memoryLimit=1283037568, maxSingleShuffleLimit=320759392, mergeThreshold=846804800, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2017-11-18 13:41:05,976 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local1118063138_0010_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2017-11-18 13:41:05,977 INFO [localfetcher#10] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(141)) - localfetcher#10 about to shuffle output of map attempt_local1118063138_0010_m_000000_0 decomp: 476 len: 480 to MEMORY
2017-11-18 13:41:05,977 INFO [localfetcher#10] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 476 bytes from map-output for attempt_local1118063138_0010_m_000000_0
2017-11-18 13:41:05,977 INFO [localfetcher#10] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(315)) - closeInMemoryFile -> map-output of size: 476, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->476
2017-11-18 13:41:05,978 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2017-11-18 13:41:05,978 INFO [pool-31-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:05,978 INFO [pool-31-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(687)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2017-11-18 13:41:05,979 INFO [pool-31-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:05,979 INFO [pool-31-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:05,980 INFO [pool-31-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(754)) - Merged 1 segments, 476 bytes to disk to satisfy reduce memory limit
2017-11-18 13:41:05,980 INFO [pool-31-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(784)) - Merging 1 files, 480 bytes from disk
2017-11-18 13:41:05,980 INFO [pool-31-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(799)) - Merging 0 segments, 0 bytes from memory into reduce
2017-11-18 13:41:05,980 INFO [pool-31-thread-1] mapred.Merger (Merger.java:merge(597)) - Merging 1 sorted segments
2017-11-18 13:41:05,981 INFO [pool-31-thread-1] mapred.Merger (Merger.java:merge(696)) - Down to the last merge-pass, with 1 segments left of total size: 396 bytes
2017-11-18 13:41:05,981 INFO [pool-31-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
----------------------reduce---------------------
key=1.0 3.0
values:
1.0 3.0
reduce生成:1.0 3.0|1.0 3.0
----------------------reduce---------------------
key=2.0 6.5
values:
2.0 6.5
reduce生成:2.0 6.5|2.0 6.5
----------------------reduce---------------------
key=3.0 102.0
values:
3.0 102.0
reduce生成:3.0 102.0|3.0 102.0
2017-11-18 13:41:05,988 INFO [pool-31-thread-1] mapred.Task (Task.java:done(1001)) - Task:attempt_local1118063138_0010_r_000000_0 is done. And is in the process of committing
2017-11-18 13:41:05,989 INFO [pool-31-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2017-11-18 13:41:05,989 INFO [pool-31-thread-1] mapred.Task (Task.java:commit(1162)) - Task attempt_local1118063138_0010_r_000000_0 is allowed to commit now
2017-11-18 13:41:05,990 INFO [pool-31-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(439)) - Saved output of task 'attempt_local1118063138_0010_r_000000_0' to file:/media/chenjie/0009418200012FF3/ubuntu/kmeans/_temporary/0/task_local1118063138_0010_r_000000
2017-11-18 13:41:05,991 INFO [pool-31-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2017-11-18 13:41:05,991 INFO [pool-31-thread-1] mapred.Task (Task.java:sendDone(1121)) - Task 'attempt_local1118063138_0010_r_000000_0' done.
2017-11-18 13:41:05,991 INFO [pool-31-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local1118063138_0010_r_000000_0
2017-11-18 13:41:05,991 INFO [Thread-263] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2017-11-18 13:41:06,864 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - Job job_local1118063138_0010 running in uber mode : false
2017-11-18 13:41:06,864 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1374)) - map 100% reduce 100%
2017-11-18 13:41:06,865 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Job job_local1118063138_0010 completed successfully
2017-11-18 13:41:06,868 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1392)) - Counters: 33
File System Counters
FILE: Number of bytes read=24648
FILE: Number of bytes written=5197374
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=12
Map output records=12
Map output bytes=1872
Map output materialized bytes=480
Input split bytes=130
Combine input records=12
Combine output records=3
Reduce input groups=3
Reduce shuffle bytes=480
Reduce input records=3
Reduce output records=3
Spilled Records=6
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=0
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=2537553920
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=106
File Output Format Counters
Bytes Written=38
readRandomCenterFromInputFile读取一行:1.0 3.0
readRandomCenterFromInputFile读取一行:2.0 6.5
readRandomCenterFromInputFile读取一行:3.0 102.0
----------------------------------------KMeans聚类结果--------------------------------------
[1.0, 3.0]
[2.0, 6.5]
[3.0, 102.0]
Spar中:
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
object ScalaKMeans {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("KMeans").setMaster("local")
val sc = new SparkContext(sparkConf)
val input = "file:///media/chenjie/0009418200012FF3/ubuntu/kmeans_input_file.txt"
val k = 3
val iterations = 10
val runs = if (args.length >= 3) args(3).toInt else 1
val lines = sc.textFile(input)
// build the vector points
val points = lines.map(line => {
val tokens = line.split("\\s+")
Vectors.dense(tokens.map(_.toDouble))
})
// build model
val model = KMeans.train(points, k, iterations, runs, KMeans.K_MEANS_PARALLEL) // spark-2.0.2
// model = KMeans.train(points, k, iterations, KMeans.K_MEANS_PARALLEL) // spark-2.1.0
println("Cluster centers:")
model.clusterCenters.foreach(println)
// compute cost
val cost = model.computeCost(points)
println(s"Cost: ${cost}")
// done!
sc.stop()
}
}