百度网盘hama(0.7.1)
提取码:a765
hama安装参考: 林子雨大数据课程Hama单机环境安装配置
需将hama的bin目录添加到环境变量PATH中.
执行过程中发现classpath添加目录下所有jar文件需将文件名全部写上,只有目录不行。(原因暂不清楚)
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.BSP;
import org.apache.hama.bsp.BSPJob;
import org.apache.hama.bsp.BSPJobClient;
import org.apache.hama.bsp.BSPPeer;
import org.apache.hama.bsp.ClusterStatus;
import org.apache.hama.bsp.FileOutputFormat;
import org.apache.hama.bsp.NullInputFormat;
import org.apache.hama.bsp.TextOutputFormat;
import org.apache.hama.bsp.sync.SyncException;
public class PiEstimator
{
private static Path TMP_OUTPUT = new Path("/tmp/pi-"
+ System.currentTimeMillis());
public static class MyEstimator
extends
BSP<NullWritable, NullWritable, Text, DoubleWritable, DoubleWritable>
{
public static final Log LOG = LogFactory.getLog(MyEstimator.class);
private String masterTask;
private static final int iterations = 10000;
@Override
public void bsp(
BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, DoubleWritable> peer)
throws IOException, SyncException, InterruptedException
{
int in = 0;
for (int i = 0; i < iterations; i++)
{
double x = 2.0 * Math.random() - 1.0, y = 2.0 * Math.random() - 1.0;
if ((Math.sqrt(x * x + y * y) < 1.0))
{
in++;
}
}
double data = 4.0 * in / iterations;
peer.send(masterTask, new DoubleWritable(data));
peer.sync();
}
@Override
public void setup(
BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, DoubleWritable> peer)
throws IOException
{
// Choose one as a master
this.masterTask = peer.getPeerName(peer.getNumPeers() / 2);
}
@Override
public void cleanup(
BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, DoubleWritable> peer)
throws IOException
{
if (peer.getPeerName().equals(masterTask))
{
double pi = 0.0;
int numPeers = peer.getNumCurrentMessages();
DoubleWritable received;
while ((received = peer.getCurrentMessage()) != null)
{
pi += received.get();
}
pi = pi / numPeers;
peer.write(new Text("Estimated value of PI is"),
new DoubleWritable(pi));
}
}
}
static void printOutput(HamaConfiguration conf) throws IOException
{
FileSystem fs = FileSystem.get(conf);
FileStatus[] files = fs.listStatus(TMP_OUTPUT);
for (int i = 0; i < files.length; i++)
{
if (files[i].getLen() > 0)
{
FSDataInputStream in = fs.open(files[i].getPath());
IOUtils.copyBytes(in, System.out, conf, false);
in.close();
break;
}
}
fs.delete(TMP_OUTPUT, true);
}
public static void main(String[] args) throws InterruptedException,
IOException, ClassNotFoundException
{
// BSP job configuration
HamaConfiguration conf = new HamaConfiguration();
BSPJob bsp = new BSPJob(conf, PiEstimator.class);
// Set the job name
bsp.setJobName("Pi Estimation Example");
bsp.setBspClass(MyEstimator.class);
bsp.setInputFormat(NullInputFormat.class);
bsp.setOutputKeyClass(Text.class);
bsp.setOutputValueClass(DoubleWritable.class);
bsp.setOutputFormat(TextOutputFormat.class);
FileOutputFormat.setOutputPath(bsp, TMP_OUTPUT);
BSPJobClient jobClient = new BSPJobClient(conf);
ClusterStatus cluster = jobClient.getClusterStatus(true);
if (args.length > 0)
{
bsp.setNumBspTask(Integer.parseInt(args[0]));
} else
{
// Set to maximum
bsp.setNumBspTask(cluster.getMaxTasks());
}
long startTime = System.currentTimeMillis();
if (bsp.waitForCompletion(true))
{
printOutput(conf);
System.out.println("Job Finished in "
+ (System.currentTimeMillis() - startTime) / 1000.0
+ " seconds");
}
}
}
#!/bin/bash
for jar in /usr/local/hama/lib/*.jar
do
export classpath=$classpath:$jar
done
for jar in /usr/local/hama/*.jar
do
export classpath=$classpath:$jar
done
javac -classpath $classpath PiEstimator.java
jar -cvf PiEstimator.jar *.class
hama jar PiEstimator.jar PiEstimator
source runhama.sh
added manifest
adding: PiEstimator.class(in = 3422) (out= 1705)(deflated 50%)
adding: PiEstimator$MyEstimator.class(in = 2441) (out= 1178)(deflated 51%)
19/10/23 22:07:17 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
19/10/23 22:07:17 INFO Configuration.deprecation: user.name is deprecated. Instead, use mapreduce.job.user.name
19/10/23 22:07:17 WARN conf.Configuration: org.apache.hadoop.fs.ChecksumFileSystem$FSDataBoundedInputStream@48fa0f47:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring.
19/10/23 22:07:17 WARN conf.Configuration: org.apache.hadoop.fs.ChecksumFileSystem$FSDataBoundedInputStream@48fa0f47:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring.
19/10/23 22:07:17 INFO Configuration.deprecation: user.name is deprecated. Instead, use mapreduce.job.user.name
19/10/23 22:07:18 INFO bsp.LocalBSPRunner: Setting up a new barrier for 10 tasks!
19/10/23 22:07:18 INFO bsp.BSPJobClient: Running job: job_localrunner_0001
19/10/23 22:07:18 INFO Configuration.deprecation: mapred.cache.localFiles is deprecated. Instead, use mapreduce.job.cache.local.files
19/10/23 22:07:21 INFO bsp.BSPJobClient: Current supersteps number: 0
19/10/23 22:07:21 INFO bsp.BSPJobClient: The total number of supersteps: 0
19/10/23 22:07:21 INFO bsp.BSPJobClient: Counters: 7
19/10/23 22:07:21 INFO bsp.BSPJobClient: org.apache.hama.bsp.BSPPeerImpl$PeerCounter
19/10/23 22:07:21 INFO bsp.BSPJobClient: TIME_IN_SYNC_MS=112
19/10/23 22:07:21 INFO bsp.BSPJobClient: TASK_OUTPUT_RECORDS=1
19/10/23 22:07:21 INFO bsp.BSPJobClient: SUPERSTEP_SUM=10
19/10/23 22:07:21 INFO bsp.BSPJobClient: TOTAL_MESSAGES_SENT=10
19/10/23 22:07:21 INFO bsp.BSPJobClient: TOTAL_MESSAGES_RECEIVED=10
19/10/23 22:07:21 INFO bsp.BSPJobClient: org.apache.hama.bsp.JobInProgress$JobCounter
19/10/23 22:07:21 INFO bsp.BSPJobClient: LAUNCHED_TASKS=10
19/10/23 22:07:21 INFO bsp.BSPJobClient: SUPERSTEPS=0
Estimated value of PI is 3.15016
Job Finished in 3.969 seconds