package org.dataalgorithms.chap01.mapreduce;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* The DateTemperaturePair class enable us to represent a
* composite type of (yearMonth, day, temperature). To persist
* a composite type (actually any data type) in Hadoop, it has
* to implement the org.apache.hadoop.io.Writable interface.
*
* To compare composite types in Hadoop, it has to implement
* the org.apache.hadoop.io.WritableComparable interface.
*
* @author Mahmoud Parsian
*/
public class DateTemperaturePair implements Writable, WritableComparable {
// 如果需要持久化存储定制的数据类型必须继承Writable接口
// 如果需要比较,还必须继承 WritableComparable接口
private final Text yearMonth = new Text();
private final Text day = new Text();
private final IntWritable temperature = new IntWritable();
public DateTemperaturePair() {
}
public DateTemperaturePair(String yearMonth, String day, int temperature) {
this.yearMonth.set(yearMonth);
this.day.set(day);
this.temperature.set(temperature);
}
public static DateTemperaturePair read(DataInput in) throws IOException {
DateTemperaturePair pair = new DateTemperaturePair();
pair.readFields(in);
return pair;
}
@Override
public void write(DataOutput out) throws IOException {
yearMonth.write(out);
day.write(out);
temperature.write(out);
}
@Override
public void readFields(DataInput in) throws IOException {
yearMonth.readFields(in);
day.readFields(in);
temperature.readFields(in);
}
@Override
public int compareTo(DateTemperaturePair pair) {
// 先按照年月排序
int compareValue = this.yearMonth.compareTo(pair.getYearMonth());
if (compareValue == 0) {
// 其次按温度排序
compareValue = temperature.compareTo(pair.getTemperature());
}
//return compareValue; // to sort ascending
// to sort descending
return -1 * compareValue;
}
public Text getYearMonthDay() {
return new Text(yearMonth.toString() + day.toString());
}
public Text getYearMonth() {
return yearMonth;
}
public Text getDay() {
return day;
}
public IntWritable getTemperature() {
return temperature;
}
public void setYearMonth(String yearMonthAsString) {
yearMonth.set(yearMonthAsString);
}
public void setDay(String dayAsString) {
day.set(dayAsString);
}
public void setTemperature(int temp) {
temperature.set(temp);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
// 温度和年月相等判断为相等
DateTemperaturePair that = (DateTemperaturePair) o;
if (temperature != null ? !temperature.equals(that.temperature) : that.temperature != null) {
return false;
}
if (yearMonth != null ? !yearMonth.equals(that.yearMonth) : that.yearMonth != null) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = yearMonth != null ? yearMonth.hashCode() : 0;
result = 31 * result + (temperature != null ? temperature.hashCode() : 0);
return result;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("DateTemperaturePair{yearMonth=");
builder.append(yearMonth);
builder.append(", day=");
builder.append(day);
builder.append(", temperature=");
builder.append(temperature);
builder.append("}");
return builder.toString();
}
}
每行数据具有:(年份 - 月) - 日 - 温度,因此该类定义了3个属性
定制分区器 分区器会根据映射器的输出键值决定哪个映射器发送到哪个规约器。
package org.dataalgorithms.chap01.mapreduce;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/**
* The DateTemperaturePartitioner is a custom partitioner class,
* whcih partitions data by the natural key only (using the yearMonth).
* Without custom partitioner, Hadoop will partition your mapped data
* based on a hash code.
*
* In Hadoop, the partitioning phase takes place after the map() phase
* and before the reduce() phase
*
* @author Mahmoud Parsian
*/
public class DateTemperaturePartitioner extends Partitioner {
@Override
public int getPartition(DateTemperaturePair pair, Text text, int numberOfPartitions) {
// make sure that partitions are non-negative
// 更具自然键:yearMonth hash值来分组
return Math.abs(pair.getYearMonth().hashCode() % numberOfPartitions);
}
}
定制一个比较器 比较器控制哪些键会被分到一个reducer.reduce()。
package org.dataalgorithms.chap01.mapreduce;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/**
* The DateTemperaturePartitioner is a custom partitioner class,
* whcih partitions data by the natural key only (using the yearMonth).
* Without custom partitioner, Hadoop will partition your mapped data
* based on a hash code.
*
* In Hadoop, the partitioning phase takes place after the map() phase
* and before the reduce() phase
*
* @author Mahmoud Parsian
*/
public class DateTemperaturePartitioner extends Partitioner {
@Override
public int getPartition(DateTemperaturePair pair, Text text, int numberOfPartitions) {
// make sure that partitions are non-negative
// 更具自然键:yearMonth hash值来分组
return Math.abs(pair.getYearMonth().hashCode() % numberOfPartitions);
}
}
root@master bin]# ./hadoop jar /root/Data/data_algorithms/chapter1/hadoop_spark-1.0-SNAPSHOT.jar org.dataalgorithms.chap01.mapreduce.SecondarySortDriver /data_algorithms/chapter1/input /data_algorithms/chapter1/output
19/04/14 16:27:04 INFO client.RMProxy: Connecting to ResourceManager at master/172.16.21.220:8032
19/04/14 16:27:05 INFO input.FileInputFormat: Total input paths to process : 1
19/04/14 16:27:06 INFO mapreduce.JobSubmitter: number of splits:1
19/04/14 16:27:06 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1555221334170_0003
19/04/14 16:27:07 INFO impl.YarnClientImpl: Submitted application application_1555221334170_0003
19/04/14 16:27:07 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1555221334170_0003/
19/04/14 16:27:07 INFO mapreduce.Job: Running job: job_1555221334170_0003
19/04/14 16:27:17 INFO mapreduce.Job: Job job_1555221334170_0003 running in uber mode : false
19/04/14 16:27:17 INFO mapreduce.Job: map 0% reduce 0%
19/04/14 16:27:24 INFO mapreduce.Job: map 100% reduce 0%
19/04/14 16:27:32 INFO mapreduce.Job: map 100% reduce 100%
19/04/14 16:27:33 INFO mapreduce.Job: Job job_1555221334170_0003 completed successfully
19/04/14 16:27:33 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=234
FILE: Number of bytes written=238407
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=289
HDFS: Number of bytes written=334
HDFS: Number of read operations=6
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=4958
Total time spent by all reduces in occupied slots (ms)=5274
Total time spent by all map tasks (ms)=4958
Total time spent by all reduce tasks (ms)=5274
Total vcore-milliseconds taken by all map tasks=4958
Total vcore-milliseconds taken by all reduce tasks=5274
Total megabyte-milliseconds taken by all map tasks=5076992
Total megabyte-milliseconds taken by all reduce tasks=5400576
Map-Reduce Framework
Map input records=14
Map output records=14
Map output bytes=200
Map output materialized bytes=234
Input split bytes=131
Combine input records=0
Combine output records=0
Reduce input groups=5
Reduce shuffle bytes=234
Reduce input records=14
Reduce output records=5
Spilled Records=28
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=151
CPU time spent (ms)=1920
Physical memory (bytes) snapshot=309817344
Virtual memory (bytes) snapshot=4159598592
Total committed heap usage (bytes)=165810176
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=158
File Output Format Counters
Bytes Written=334
19/04/14 16:27:33 INFO mapreduce.SecondarySortDriver: run(): status=true
19/04/14 16:27:33 INFO mapreduce.SecondarySortDriver: returnStatus=0
[root@master bin]# ./hadoop fs -ls /data_algorithms/chapter1/output/
Found 2 items
-rw-r--r-- 3 root supergroup 0 2019-04-14 16:27 /data_algorithms/chapter1/output/_SUCCESS
-rw-r--r-- 3 root supergroup 334 2019-04-14 16:27 /data_algorithms/chapter1/output/part-r-00000
[root@master bin]# ./hadoop fs -cat /data_algorithms/chapter1/output/p*
DateTemperaturePair{yearMonth=2019z, day=4, temperature=0} 8,7,4,0,
DateTemperaturePair{yearMonth=2019y, day=3, temperature=1} 7,5,1,
DateTemperaturePair{yearMonth=2019x, day=1, temperature=3} 9,6,3,
DateTemperaturePair{yearMonth=2019r, day=3, temperature=60} 60,
DateTemperaturePair{yearMonth=2019p, day=1, temperature=10} 40,20,10,
将执行命令改写为脚本
[root@master chapter1]# ./run.sh
rmr: DEPRECATED: Please use 'rm -r' instead.
rmr: `/data_algorithms/chapter1/output': No such file or directory
2019,p,4,40
2019,p,6,20
2019,x,2,9
2019,y,2,5
2019,x,1,3
2019,y,1,7
2019,y,3,1
2019,x,3,6
2019,z,1,4
2019,z,2,8
2019,z,3,7
2019,z,4,0
2019,p,1,10
2019,r,3,60
./run.sh:行7: org.dataalgorithms.chap01.mapreduce.SecondarySortDriver: 未找到命令
Exception in thread "main" java.lang.ClassNotFoundException: /data_algorithms/chapter1/input
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.hadoop.util.RunJar.run(RunJar.java:214)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
[root@master chapter1]# cat run.sh
# run.sh
export APP_JAR=/root/Data/data_algorithms/chapter1/hadoop_spark-1.0-SNAPSHOT.jar
INPUT=/data_algorithms/chapter1/input
OUTPUT=/data_algorithms/chapter1/output
$HADOOP_HOME/bin/hadoop fs -rmr $OUTPUT
$HADOOP_HOME/bin/hadoop fs -cat $INPUT/sam*
PROG=package org.dataalgorithms.chap01.mapreduce.SecondarySortDriver
$HADOOP_HOME/bin/hadoop jar $APP_JAR $PROG $INPUT $OUTPUT
POJ-1273-Drainage Ditches
http://poj.org/problem?id=1273
基本的最大流,按LRJ的白书写的
#include<iostream>
#include<cstring>
#include<queue>
using namespace std;
#define INF 0x7fffffff
int ma
开启列模式: Alt+C 鼠标选择 OR Alt+鼠标左键拖动
列模式替换或复制内容(多行):
右键-->格式-->填充所选内容-->选择相应操作
OR
Ctrl+Shift+V(复制多行数据,必须行数一致)
-------------------------------------------------------
About Health Monitor
Beginning with Release 11g, Oracle Database includes a framework called Health Monitor for running diagnostic checks on the database.
About Health Monitor Checks
Health M
实现功能的代码:
# include <stdio.h>
# include <malloc.h>
struct Student
{
int age;
float score;
char name[100];
};
int main(void)
{
int len;
struct Student * pArr;
int i,
http://wiki.sdn.sap.com/wiki/display/BOBJ/Troubleshooting+Crystal+Reports+off+BW#TroubleshootingCrystalReportsoffBW-TracingBOE
Quite useful, especially this part:
SAP BW connectivity
For t