li385805776

data-intensive text processing with mapreduce-Graph Algorithms

Graph Algorithms

Parallel Breadth-First Search

Dijkstra算法

data-intensive text processing with mapreduce-Graph Algorithms_第1张图片

MapReduce算法：

MainIdea：类似于水波扩散的方式，以一点为中心进行扩散，mapreduce每次迭代步长加一，并算出当前步长内到各节点的最短距离。

当边的权重固定为1时，迭代的结束条件为不再有节点随着步长的增加没有被访问过。

当边的权重不为1时，迭代的次数为节点数减一（naive），到每个节点的最短路径不再变化(Revised)。

在每一次迭代过程当中：

map的输入为（节点编号，[当前最短距离，邻接矩阵]），输出为当前节点的（节点编号，[当前最短距离，邻接矩阵]），以及（节点编号，通过该节点到达邻接节点的距离）

reduce输入为输入为map的输出，通过迭代values中（通过该节点到达邻接节点的距离）的value可得到其他节点到达该节点的距离，求出当前最短距离；当value为（[当前最短距离，邻接矩阵]），可以得到邻接矩阵信息，最后输出（节点编号，[当前最短距离，邻接矩阵]）

可以参看这个链接讲思路： http://www.zhizhihu.com/html/y2012/3928.html

data-intensive text processing with mapreduce-Graph Algorithms_第2张图片

Code

InputData：

linkWeight = 1

1	0	{2=1, 3=1}
2	1000	{4=1, 5=1}
3	1000	{10=1, 6=1, 7=1}
4	1000	{5=1, 6=1, 3=1}
5	1000	{8=1}
6	1000	{9=1, 8=1, 7=1}
7	1000	{8=1}
8	1000	{9=1}
9	1000	{5=1}
10	1000	{2=1, 9=1, 8=1}

linkWeight ！= 1

1	0	{2=6, 3=3, 18=100, 11=1}
2	1000	{4=9, 5=3, 1=4}
3	1000	{10=3, 6=6, 7=1}
4	1000	{5=1, 6=4, 3=3}
5	1000	{8=3, 1=4}
6	1000	{9=4, 8=7, 7=4}
7	1000	{8=2, 6=2}
8	1000	{9=3, 1=3}
9	1000	{5=8}
10	1000	{2=4, 9=6, 8=4}
11	1000	{12=1}
12	1000	{13=1}
13	1000	{14=1)
14	1000	{15=1}
15	1000	{16=1}
16	1000	{17=1}
17	1000	{18=1}
18	1000	{18=0}

边数据结构，主要包括权重，可扩展

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午2:07
 * To change this template use File | Settings | File Templates.
 */
public class linkMeta implements Writable {
	private int linkWeight;

	public linkMeta() {
	}

	public linkMeta(int linkWeight) {
		this.linkWeight = linkWeight;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(linkWeight);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		linkWeight = in.readInt();
	}

	@Override
	public String toString() {
		return "" + linkWeight;
	}

	public int getLinkWeight() {
		return linkWeight;
	}

	public void setLinkWeight(int linkWeight) {
		this.linkWeight = linkWeight;
	}
}

节点数据结构，对应map的输入中的value

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;


public class Node implements WritableComparable {

	private boolean node;

	private HashMap<Long, linkMeta> adjacencyList;

	private int currentDistance;

	public Node() {

	}


	@Override
	public boolean equals(Object obj) {
		if (obj instanceof Node) {
			Node that = (Node) obj;
			return this.isNode() == that.isNode() && that.getAdjacencyList().equals(that.getAdjacencyList()) && this.getCurrentDistance() == that.getCurrentDistance();
		}
		return false;
	}

	@Override
	public int hashCode() {
		return adjacencyList.hashCode();
	}

	@Override
	public String toString() {
		if (node == true)
			return currentDistance + "\t" + adjacencyList.toString();
		else
			return "" + currentDistance;
	}

	@Override
	public int compareTo(Object o) {
		Node that = (Node) o;
		if (that.isNode() == that.isNode())
			return ((Integer) currentDistance).compareTo(((Node) o).getCurrentDistance());
		else if (this.isNode()) {
			return 1;
		} else {
			return -1;
		}
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeBoolean(isNode());
		//为节点类型，才对adjacencyList序列化
		if (isNode()) {
			out.writeInt(adjacencyList.size());
			for (Long aLong : adjacencyList.keySet()) {
				out.writeLong(aLong);
				adjacencyList.get(aLong).write(out);
			}
		}
		out.writeInt(currentDistance);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		node = in.readBoolean();
		//为节点类型，才对adjacencyList反序列化
		if (isNode()) {
			adjacencyList = new HashMap<Long, linkMeta>();
			int size = in.readInt();
			long key;
			for (int i = 0; i < size; i++) {
				linkMeta linkMeta = new linkMeta();
				key = in.readLong();
				linkMeta.readFields(in);
				adjacencyList.put(key, linkMeta);
			}
		}
		currentDistance = in.readInt();
	}

	public HashMap<Long, linkMeta> getAdjacencyList() {
		return adjacencyList;
	}

	public void setAdjacencyList(HashMap<Long, linkMeta> adjacencyList) {
		this.adjacencyList = adjacencyList;
	}

	public boolean isNode() {
		return node;
	}

	public void setNode(boolean node) {
		this.node = node;
	}

	public int getCurrentDistance() {
		return currentDistance;
	}

	public void setCurrentDistance(int currentDistance) {
		this.currentDistance = currentDistance;
	}

	public void set(Node value) {
		this.node = value.isNode();
		this.adjacencyList = value.getAdjacencyList();
		this.currentDistance = value.getCurrentDistance();
	}
}

Counters

package GraphAlgorithms.PBFS;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-25
 * Time: 下午12:41
 * To change this template use File | Settings | File Templates.
 */

//用来统计访问到的节点、未访问到的节点以及变化的节点
public enum Finished {
	MAXDISTANCE, CHANGED, REACHED
}

Map阶段

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.HashMap;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午3:15
 * To change this template use File | Settings | File Templates.
 */
public class MyMapper extends Mapper<LongWritable, Node, LongWritable, Node> {
	private final static int MAXDISTANCE = 1000;
	private LongWritable outKey = new LongWritable();
	private Node outValue = new Node();

	@Override
	protected void map(LongWritable key, Node value, Context context) throws IOException, InterruptedException {
		int distance = value.getCurrentDistance();
		value.setNode(true);
		context.write(key, value);
//		System.out.println(key + "\t" + value);
		HashMap<Long, linkMeta> adjacencyList = value.getAdjacencyList();
		for (Long aLong : adjacencyList.keySet()) {
			//当key节点已经访问到时，产生一个条由key到key相邻节点的路径（不一定最短，最短在reduce计算）
			if (distance != MAXDISTANCE && aLong != key.get()) {
				outKey.set(aLong);
				outValue.setNode(false);
				int linkWeight = adjacencyList.get(aLong).getLinkWeight();
				outValue.setCurrentDistance(distance + linkWeight);
				context.write(outKey, outValue);
//				System.out.println("-----" + outKey + "\t" + outValue);
			}

		}
	}
}

Reduce阶段

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午9:06
 * To change this template use File | Settings | File Templates.
 */
public class MyReducer extends Reducer<LongWritable, Node, LongWritable, Node> {

	private final static int MAXDISTANCE = 1000;
	private Node outValue = new Node();
	private long sourceNode;


	@Override
	protected void setup(Context context) throws IOException, InterruptedException {
		sourceNode = Long.parseLong(context.getConfiguration().get("sourceNode"));
	}

	@Override
	protected void reduce(LongWritable key, Iterable<Node> values, Context context) throws IOException, InterruptedException {
		int minDistance = MAXDISTANCE;
		int preDistance = -1;
		for (Node value : values) {
			if (value.isNode()) {
//				outValue = value;  //对象被重用，必须重新构造,不能直接引用
				outValue.set(value);
				preDistance = value.getCurrentDistance();
			} else {
				int distance = ((Node) value).getCurrentDistance();
				if (distance < minDistance) {
//					找出当前到key节点的最短路径
					minDistance = distance;
				}
			}
		}
		if (sourceNode != key.get())
			outValue.setCurrentDistance(minDistance);
		context.write(key, outValue);

		//key节点未被访问到，未被访问节点数加一，否则访问节点数加一
		if (outValue.getCurrentDistance() == MAXDISTANCE) {
			context.getCounter(Finished.MAXDISTANCE).increment(1);
		} else {
			context.getCounter(Finished.REACHED).increment(1);
		}
		//统计本轮当前最短路径变化的节点数
		if (preDistance != outValue.getCurrentDistance()) {
			context.getCounter(Finished.CHANGED).increment(1);
		}
//		System.out.println("ReduceOut:" + key + "\t" + outValue);
	}
}

配置jobs

package GraphAlgorithms.PBFS;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午1:43
 * To change this template use File | Settings | File Templates.
 */
public class PBFSearch extends Configured implements Tool {

	private FileSystem fileSystem;

	@Override
	public int run(String[] args) throws Exception {
		//	lingWeightIsOne(args);
		//linkWeightIsNotOne(args);
		//linkWeightIsNotOneRevised(args);
		return 0;
	}
	private void linkWeightIsNotOneRevised(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
		Path input = new Path(args[0]);
		Path output = null;
		long changeCounter = -1;
		//访问过的节点数不变时才迭代
		for (int i = 1; changeCounter != 0; i++) {
			output = new Path(args[1] + "/" + i);
			Job firstJob = oneStepRun(input, output);
			changeCounter = firstJob.getCounters().findCounter(Finished.CHANGED).getValue();
			//删除临时文件
			deleteTempFile(input, i, firstJob);
			input = output;
			System.out.println(i);
		}
		//对最终的输出结果重命名
		fileSystem.rename(output, new Path(args[1] + "/out"));
	}

	private void linkWeightIsNotOne(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
		Path input = new Path(args[0]);
		Path output = null;
		long maxDistanceCounter = Long.MAX_VALUE;
		long reachedCounter = 0;
		//访问过的节点数不变时才迭代
		for (int i = 1; i - reachedCounter != maxDistanceCounter; i++) {
			output = new Path(args[1] + "/" + i);
			Job firstJob = oneStepRun(input, output);

			//获取访问的节点数
			reachedCounter = firstJob.getCounters().findCounter(Finished.REACHED).getValue();
			maxDistanceCounter = firstJob.getCounters().findCounter(Finished.MAXDISTANCE).getValue();

			//删除临时文件
			deleteTempFile(input, i, firstJob);
			input = output;
			System.out.println(i);
		}
		//对最终的输出结果重命名
		fileSystem.rename(output, new Path(args[1] + "/out"));
	}

	private void lingWeightIsOne(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
		Path input = new Path(args[0]);
		Path output = null;
		long preReached = -1;
		long reachedCounter = 0;
		//访问过的节点数不变时才迭代
		for (int i = 1; preReached != reachedCounter; i++) {
			preReached = reachedCounter;
			output = new Path(args[1] + "/" + i);
			Job firstJob = oneStepRun(input, output);

			//获取访问的节点数
			reachedCounter = firstJob.getCounters().findCounter(Finished.REACHED).getValue();
//			System.out.println("MaxDistanceCounter = "+maxDistanceCounter);
//			System.out.println("Reached = "+ reachedCounter);

			//删除临时文件
			deleteTempFile(input, i, firstJob);
			input = output;
			System.out.println(i);
		}
		//对最终的输出结果重命名
		fileSystem.rename(output, new Path(args[1] + "/out"));
	}

	private void deleteTempFile(Path input, int i, Job firstJob) throws IOException {
		if (i != 1) {
			fileSystem.delete(input, true);
		} else {
			fileSystem = FileSystem.get(firstJob.getConfiguration());
		}
	}

	private Job oneStepRun(Path input, Path output) throws IOException, InterruptedException, ClassNotFoundException {
		Configuration conf = new Configuration();
		conf.set("sourceNode", "1");
		Job job = new Job(conf, "PBFSearch");
		job.setJarByClass(PBFSearch.class);
		job.setInputFormatClass(MyInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(Node.class);
		MyInputFormat.addInputPath(job, input);
		TextOutputFormat.setOutputPath(job, output);
		job.waitForCompletion(true);
		return job;
	}

	public static void main(String[] args) throws Exception {
		int exitCode = ToolRunner.run(new PBFSearch(), args);
		System.exit(exitCode);
	}
}

自定义InputFormat用于产生自定义RecordReader

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午3:21
 * To change this template use File | Settings | File Templates.
 */
public class MyInputFormat extends FileInputFormat<LongWritable, Node> {
	@Override
	public RecordReader<LongWritable, Node> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
		return new MyRecordReader();
	}
}

自定义RecordReader，用于产生<LongWritable,Node>的key-value对

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;

import java.io.IOException;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午3:21
 * To change this template use File | Settings | File Templates.
 */
public class MyRecordReader extends RecordReader<LongWritable, Node> {
	private RecordReader recordReader;
	private LongWritable key;
	private Node value;

	@Override
	public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
		recordReader = new LineRecordReader();
		recordReader.initialize(split, context);
		key = new LongWritable();
		value = new Node();
	}

	@Override
	public boolean nextKeyValue() throws IOException, InterruptedException {
		boolean hasNext = recordReader.nextKeyValue();
		if (hasNext) {
			String valueText = recordReader.getCurrentValue().toString();
			String[] keyAndValues = valueText.split("\t");

			key.set(Long.parseLong(keyAndValues[0]));
			value.setCurrentDistance(Integer.parseInt(keyAndValues[1]));
			generateValue(keyAndValues[2]);
			return true;
		} else {
			return false;
		}
	}

	private void generateValue(String value) {
		HashMap<Long, linkMeta> result = new HashMap<Long, linkMeta>();
		Pattern pattern = Pattern.compile("(\\d+)=(\\d+)");
		Matcher matcher = pattern.matcher(value);
		long keyLong = -1;
		int linkWeight = -1;
		while (matcher.find()) {
			for (int i = 1; i <= matcher.groupCount(); i++) {
				switch (i) {
					case 1:
						keyLong = Long.parseLong(matcher.group(i));
						break;
					case 2:
						linkWeight = Integer.parseInt(matcher.group(i));
						break;
					default:
						System.out.println("Error in parse Regex");
						break;
				}
			}
			result.put(keyLong, new linkMeta(linkWeight));
		}
		this.value.setAdjacencyList(result);
	}


	@Override
	public LongWritable getCurrentKey() throws IOException, InterruptedException {
		return key;  //To change body of implemented methods use File | Settings | File Templates.
	}

	@Override
	public Node getCurrentValue() throws IOException, InterruptedException {
		return value;  //To change body of implemented methods use File | Settings | File Templates.
	}

	@Override
	public float getProgress() throws IOException, InterruptedException {
		return recordReader.getProgress();  //To change body of implemented methods use File | Settings | File Templates.
	}

	@Override
	public void close() throws IOException {
		recordReader.close();
	}
}

写代码过程中的一些问题：

1.HashMap反序列化时，要在for里面new value（容器都类似）。

2.自定义RecordReader用到正则表达式，\\\\代表一个\，组(A)不用写成\$A\$。

3.reduce中foreach迭代values时，value对象被重用了（每次序列化得到一个value），但是在纯java中foreach的不会。使用容器的时候尤其需要注意（引用问题）验证代码如下：

hadoop输入：

aaaaaaaaaaaaaaaaaaaaaa
bbbbbbbbbbbbbbbbbbbbbb

mapreduce 代码：

package test;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import static java.lang.System.exit;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-25
 * Time: 下午8:28
 * To change this template use File | Settings | File Templates.
 */
public class HadoopTest extends Configured implements Tool {
	public static void main(String[] args) throws Exception {
		int exitCode = ToolRunner.run(new HadoopTest(), args);
		exit(exitCode);
	}

	@Override
	public int run(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = new Job(conf, "HadoopTest");
		job.setJarByClass(HadoopTest.class);
		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(A.class);
		TextInputFormat.addInputPath(job,new Path(args[0]));
		TextOutputFormat.setOutputPath(job,new Path(args[1]));
		return job.waitForCompletion(true) ? 0 : 1;
	}

	public static class A implements WritableComparable{
		Text text = new Text();
		@Override
		public int compareTo(Object o) {
			return text.compareTo(((A)o).text);
		}

		@Override
		public void write(DataOutput out) throws IOException {
			text.write(out);
		}

		@Override
		public void readFields(DataInput in) throws IOException {
			text.readFields(in);
		}
	}

	public static class MyMapper extends Mapper<LongWritable,Text,LongWritable,A> {
		private  A outValue = new A();
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
		                                                  outValue.text = value;
				context.write(new LongWritable(1),outValue);
		}
	}
	public static class MyReducer extends Reducer<LongWritable, A, LongWritable, A> {
		@Override
		protected void reduce(LongWritable key, Iterable<A> values, Context context) throws IOException, InterruptedException {
			for (A value : values) {
				System.out.println(value.text+" = " +value);
			}
		}
	}
}

reduce中打印部分

13/11/25 21:35:03 INFO mapred.LocalJobRunner: 
13/11/25 21:35:03 INFO mapred.Task: Task:attempt_local297038115_0001_r_000000_0 is done. And is in the process of commiting
aaaaaaaaaaaaaaaaaaaaaa = test.HadoopTest$A@24be0018
bbbbbbbbbbbbbbbbbbbbbb = test.HadoopTest$A@24be0018
13/11/25 21:35:03 INFO mapred.LocalJobRunner:

纯Java

package test;

import java.io.FileNotFoundException;
import java.util.ArrayList;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-25
 * Time: 下午9:42
 * To change this template use File | Settings | File Templates.
 */
public class Test {

		public static void main(String[] args) throws FileNotFoundException {
			ArrayList<A> as = new ArrayList<A>();
			as.add(new A());
			as.add(new A());
			as.add(new A());
			for (A a : as) {
				System.out.println(a);
			}
		}
		static class A {
		}
}

test.Test$A@121f34d0
test.Test$A@3ec44cfb
test.Test$A@499a12ee

Process finished with exit code 0

Conclusion

mapreduce版本是蒙卡罗特（暴力）算法，每次迭代从边缘扩张，对于边缘之外的点的计算没有意义，属于无用功；相比之下Dijkstra算法更高效，但会受内存限制。

Graph Algorithms in MapReduce Characteristics:

1.图结构用邻接表表示，节点中包含其他的信息。

2.map对节点特征执行一个函数，将结果保存在节点中，或者邻接表中，即，将计算转化为当前节点的局部图计算（邻接表），计算的结果作为value，邻接节点id作为key；reduce接收每个节点的局部计算结果（作为目标节点，如，所有指向B的局部计算结果），再进行其他计算（通常是聚集操作）。

3.除计算之外，图的结构本身也从map传向reduce，再由reduce执行更新并写入磁盘（或HDFS等）

4.图算法通常是迭代算法，因此需要一个non-mapreduce driver控制迭代的终止，一般是用Counter。

For parallel breadth- rst search, the mapper computation is the current distance plus edge distance (emitting distances to neighbors), while the reducer computation is the Min function (selecting the shortest path)--mapper计算从当前节点到其邻居节点的距离（到源点，经过当前节点），reduce计算Min，（图结构没有变化，节点信息需要更新）

PageRank

MapReduce

MainIdea：与单源点最短路径差不多，每次迭代只是将概率分散给邻接节点，再对每个节点得到的所有概率求和。

MapReduce执行流程

data-intensive text processing with mapreduce-Graph Algorithms_第4张图片

MapReduce伪代码

data-intensive text processing with mapreduce-Graph Algorithms_第5张图片

以上的所有讨论都没有考虑dangling node（出度为0的节点）和Random Work（有跳转到任一节点的可能），考虑这两个情况后新PR值计算公式：

alpha为随机游走的概率，G为网络节点综述，m为dangling node本轮积累（求和）下来的概率（code 数据中第一论的dangling mass 为 9和10的初始PR值之和，他们的概率没有分散个其他节点）。

编程思路

1.需要两个Job，或者JobChain，一下讲述和实现的是两个Job的版本，因为笔者喜欢用新版本API，且用的是Hadoop1.X 版本，Hadoop 1.X只提供了老版本的JobChain，但是Yarn中新版本API已经包含JobChain，两个方法基本一致，但是JobChain效率更高。

2.Job one：求PR值和dangling mass

Map：有邻接节点的节点会均分概率给邻接节点，没有邻接节点的会将当前PR值累计到dangling mass

Reduce：计算新的PR值和本轮的Dangling mass

3.Job two：均分dangling mass 和 random work 并统计本轮结束时dangling node数目

Map：计算考虑了dangling mass 和random work后的PR值，并读入上一轮的输出结果，以便于ruduce处理

Reduce：比较本轮和上一轮每个节点的PR值，并统计大于误差阈值的dangling node数目

4.dangling node数目等于0或者迭代次数达上限停止迭代，否在，执行2。

具体可以查看Code部分的代码，注释的还算比较充分。

Code

输入数据：格式节点初始PR值|邻接节点，9，10 为dangling node

1	0.1|2 3
2	0.1|4 5 1
3	0.1|10 6 7
4	0.1|5 6 3
5	0.1|8
6	0.1|9 8 7
7	0.1|8
8	0.1|9
9	0.1|
10	0.1|

Counters

package GraphAlgorithms.PageRank;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-30
 * Time: 上午9:57
 * To change this template use File | Settings | File Templates.
 */

/**
 * PageRank用到的Counter
 * */
public enum PageRankCounters {
	DANGLES,            //记录dangling节点数
	TOTAL               //记录总节点数
}

Jobs配置

package GraphAlgorithms.PageRank;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-30
 * Time: 上午9:15
 * To change this template use File | Settings | File Templates.
 */
public class PageRank extends Configured implements Tool {
	public static final String damplingFile = "_DanglingSum";
	public static final double ALPHA = 0.05;
	public static final double VARIANCE = 0.00000001;
	public static final int MAXITER = 100;

	@Override
	public int run(String[] args) throws Exception {
		//i为迭代次数，input和output为输入输出
		int i = 1;
		String input = args[0];
		String output = args[1];


		while (iterOnce(input, output, i) != 0 && i < MAXITER) {
			//第i+1轮输入等于第i轮输出
			input = output;
			i++;
		}

		return 0;
	}

	/**
	 * 执行一轮完整的迭代，包括 Job one：求PR值和dangling mass ;
	 *Job two：均分dangling mass 和 random work 并统计本轮结束时dangling node数目，作为返回值
	*/
	private long iterOnce(String input, String output, int i) throws IOException, InterruptedException, ClassNotFoundException {
		// 输入输出目录结构说明
		// 第1轮 calculatePageRank 初始输入为：InitData   输出为：OutData/1/A
		//       considerDamplingAndRandom 输入为：OutData/1/A   输出为 OutData/1/B
		// 第2轮 calculatePageRank 输入为：OutData/1/B   输出为：OutData/2/A
		//       considerDamplingAndRandom 输入为：OutData/2/A   输出为 OutData/2/B
		long totalNode = calculatePageRank(input, output, i);
		long dangling = considerDamplingAndRandom(output, input, i, totalNode);
		System.out.println("i = "+i + " , dangling = "+dangling);
		return dangling;
	}

	/**
	 * 均分dangling mass 和 random work 并统计本轮结束时dangling node数目，作为返回值
	 */
	private long considerDamplingAndRandom(String inputPre, String inputLastIter, int i, long totalNode) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration conf = new Configuration();
		Job job = new Job(conf, "PageRank2");
		job.setJarByClass(PageRank.class);

		MultipleInputs.addInputPath(job, new Path(inputPre + "/" + i + "/A"), KeyValueTextInputFormat.class,
				DamplingAndRandomMapper.class);

		if (i == 1) {
			MultipleInputs.addInputPath(job, new Path(inputLastIter), KeyValueTextInputFormat.class,
					LastIterMapper.class);
		} else {
			MultipleInputs.addInputPath(job, new Path(inputLastIter + "/" + (i - 1) + "/B"),
					KeyValueTextInputFormat.class, LastIterMapper.class);
		}

		job.setOutputFormatClass(TextOutputFormat.class);
		job.setReducerClass(FinalReducer.class);
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(Text.class);
		TextOutputFormat.setOutputPath(job, new Path(inputPre + "/" + i + "/B"));

		String danglingSum = getDanglingSum(inputPre + "/" + i + "/A", conf);

		double reg = (ALPHA + (1 - ALPHA) * (Double.parseDouble(danglingSum))) / totalNode;
		job.getConfiguration().set("totalNode", "" + totalNode);
		job.getConfiguration().set("reg", "" + reg);
		job.waitForCompletion(true);
		return job.getCounters().findCounter(PageRankCounters.DANGLES).getValue();
	}
	/**
	 * 获取dangling mass
	 * */
	private String getDanglingSum(String input, Configuration conf) throws IOException {
		FileSystem fileSystem = FileSystem.get(conf);
		Path inputPath = new Path(input + "/_DanglingSum");
		FSDataInputStream inputStream = fileSystem.open(inputPath);
		byte[] buff = new byte[100];
		int k = inputStream.read(buff);
		IOUtils.closeStream(inputStream);
		return new String(buff, 0, k);
	}

	/**
	* 求PR值和dangling mass
	* */
	private long calculatePageRank(String input, String output, int i) throws IOException, InterruptedException, ClassNotFoundException {
		Configuration conf = new Configuration();
		conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");
		Job job = new Job(conf, "PageRank");
		job.setJarByClass(PageRank.class);
		job.setInputFormatClass(KeyValueTextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(Text.class);
		if (i == 1) {
			//第一次迭代时，输入路径任意
			KeyValueTextInputFormat.addInputPath(job, new Path(input + "/"));
		} else {
			//第一次迭代之后的输入，为上次迭代的最终输出
			KeyValueTextInputFormat.addInputPath(job, new Path(input + "/" + (i - 1) + "/B"));
		}
		TextOutputFormat.setOutputPath(job, new Path(output + "/" + i + "/A"));
		job.waitForCompletion(true);

		return job.getCounters().findCounter(PageRankCounters.TOTAL).getValue();
	}

	public static void main(String[] args) throws Exception {
		int exitCode = ToolRunner.run(new PageRank(), args);
		System.exit(exitCode);
	}
}

MyMapper：

package GraphAlgorithms.PageRank;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-30
 * Time: 上午9:17
 * To change this template use File | Settings | File Templates.
 */
public class MyMapper extends Mapper<Text, Text, LongWritable, Text> {
	private double pagerankValue;
	private LongWritable outKey = new LongWritable();
	private Text outValue = new Text();

	@Override
	protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {

		String[] line = value.toString().trim().split("\\|");
		pagerankValue = Double.parseDouble(line[0]);

		//将邻接表写出到reduce，*标识value中内容为邻接表
		outKey.set(Long.parseLong(key.toString()));
		outValue.set("*"+value.toString());
		context.write(outKey, outValue);


		if (line.length == 1) {
		    //邻接表为空，说明为dangling node，key设为-1，value为当前PR值
				outKey.set(-1);
			outValue.set("" + pagerankValue);
			context.write(outKey, outValue);
		} else {
			//邻接表部位空，将当前PR值均分给各个邻接节点，#标识输出内容为均分的PR值
			String[] nodes = line[1].trim().split(" ");
			double avgPR = pagerankValue / nodes.length;
			for (String node : nodes) {
				outKey.set(Long.parseLong(node));
				outValue.set("#" + avgPR);
				context.write(outKey, outValue);
			}
		}
	}
}

MyReduce，clearup函数有个BUG，等有时间再处理（细想其实也没有Bug，因为dangling mass key 都为-1，必定只会聚集到一个reduce中）

package GraphAlgorithms.PageRank;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


import java.io.IOException;


/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-30
 * Time: 上午9:19
 * To change this template use File | Settings | File Templates.
 */
public class MyReducer extends Reducer<LongWritable, Text, LongWritable, Text> {
	private Text outKey = new Text();
	private Text outValue = new Text();
	private String danglingSum = null;




	@Override
	protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
		String sKey = key.toString();
		if (sKey.equals("-1")) {
			// 统计 dangling mass
			double sum = 0;
			for (Text value : values) {
				sum += Double.parseDouble(value.toString());
			}
			danglingSum = "" + sum;
		} else {
			double currentPRValue = 0;
			String sValue;
			String adjacencyList = "";


			//更新PR值，并将邻接表写出，生成下一轮输入
			for (Text value : values) {
				sValue = value.toString();
				if (sValue.startsWith("*")) {
					String[] line = value.toString().trim().split("\\|");
					if (line.length == 2) {
						adjacencyList = line[1];
					}
				} else {
					currentPRValue += Double.parseDouble(sValue.substring(1));
				}
			}
			context.getCounter(PageRankCounters.TOTAL).increment(1);
			outValue.set(currentPRValue + "|" + adjacencyList);
			context.write(key, outValue);
		}


	}


	@Override
	protected void cleanup(Context context) throws IOException, InterruptedException {
		if (danglingSum != null) {
			//将本轮的dangling mass写入文件，PS：单个reduce可以试用，多个reduce则要使用MultipleOutputs，在reduce函数中输出
			Configuration conf = context.getConfiguration();
			FileSystem fileSystem = FileSystem.get(conf);
			Path outDanglingPath = new Path(conf.get("mapred.output.dir") + "/"+PageRank.damplingFile);
			FSDataOutputStream outputStream = fileSystem.create(outDanglingPath);
			outputStream.write(danglingSum.getBytes("utf-8"));
			IOUtils.closeStream(outputStream);
		}
	}
}

LastIterMapper

package GraphAlgorithms.PageRank;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-30
 * Time: 下午9:17
 * To change this template use File | Settings | File Templates.
 */
public class LastIterMapper extends Mapper<Text,Text,LongWritable,Text> {
	private Text outValue = new Text();
	private LongWritable outKey = new LongWritable();
	@Override
	protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
		//得到上一轮的PR值
		String[] line = value.toString().trim().split("\\|");
		outValue.set("*" + Double.parseDouble(line[0]));
		outKey.set(Long.parseLong(key.toString()));
		context.write(outKey,outValue);
	}
}

DamplingAndRandomMapper

package GraphAlgorithms.PageRank;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-30
 * Time: 下午4:11
 * To change this template use File | Settings | File Templates.
 */
public class DamplingAndRandomMapper extends Mapper<Text,Text,LongWritable,Text> {
	private double totalNode;
	private double reg;
	private LongWritable outKey = new LongWritable();
	private Text outValue = new Text();

	@Override
	protected void setup(Context context) throws IOException, InterruptedException {
		totalNode = Double.parseDouble(context.getConfiguration().get("totalNode"));
		reg = Double.parseDouble(context.getConfiguration().get("reg"));
	}

	@Override
	protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
		//针对dangling node 和 random work 修改PR值
		String[] line = value.toString().trim().split("\\|");
		double prePRValue = Double.parseDouble(line[0]);
		double currentPRValue = reg+(1-PageRank.ALPHA)*prePRValue;
		outKey.set(Long.parseLong(key.toString()));
		outValue.set(currentPRValue + (line.length == 2 ? "|" + line[1] : "|"));
		context.write(outKey, outValue);
	}
}

FinalReducer

package GraphAlgorithms.PageRank;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-30
 * Time: 下午9:32
 * To change this template use File | Settings | File Templates.
 */
public class FinalReducer extends Reducer<LongWritable,Text,LongWritable,Text> {
	@Override
	protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
		//将当前PR值与上轮PR值进行比较。
		double prePRV = 0,currPRV = 0;
		for (Text value : values) {
			String sValue = value.toString();
			if (sValue.startsWith("*")) {
				prePRV = Double.parseDouble(sValue.substring(1));
			} else {
				context.write(key,value);
				currPRV = Double.parseDouble(sValue.substring(0, sValue.indexOf('|')));
			}
		}
		if (Math.abs(currPRV - prePRV) >= PageRank.VARIANCE) {
			context.getCounter(PageRankCounters.DANGLES).increment(1);
		}
	}
}

写代码过程中的问题：

1.PageRank没有使用自定义数据结构（单源点最短距离用的是自定义数据结构），全部转换为Text处理，感觉还是使用自定义数据结构要方便，而且代码更加清晰。

2.每次迭代的input 和 output转或容易出错。

3.不能在map或reduce中通过configuration设置变量（即danglingSum），供Job完成后，利用configuration读取。（分析：不同的map or reduce将会设置不同的值，在Job中读取必定会有冲突，另一方面 map和reduce 中的 configuration 是new 出来的，不是同一个对象），通过写文件的方法实现了对值的保存，待Job完成，通过读文件可以获取该值。

Conclusion

1.迭代的终止条件是多选择的，比如PR值的排名不再变化，PR值的变化量小于某个阈值或设置迭代上限等

2.dangling node 处理策略也是多种多样的，random work 中alpha取值也不固定

Issues With Graph Processing

1.单机算法通常可以在内存维护一个全局的数据结构，而mapreduce无法维持这个全局的数据结构，因为大规模稀疏图通常是用邻接表存储，一个节点只能和它的邻接节点交流，即只能与局部的图交流

2.图算法的局限性是，在每个节点进行局部计算，将结果传递给邻接节点。中间数据的量是根据边的多少来定，所以MapReduce算法适用与稀疏图不适用与大规模稠密图，在稠密图上算法的主要开销可能是（计算机节点间）中间数据的网络传输，最糟可达到边的平方量级。

3.Combiners 和 in-mapper combiner能减少运行时间，min和sum是满足交换率和结合律。

4.好的数据分布方式能极大提高comibner的性能，比如对数据按某些属性进行排序，比如社交网络按学校排序，web网页按语种排序和域名排序，并且排序是mapreduce的强项

5.在大规模的网络中，每个节点的概率都很低，可能在float类型数据可表示之外，因此取对数概率是一个解决方法，并且取法也是多样的如log（1+x）

Summary

This chapter covers graph algorithms in MapReduce, discussing in detail parallel breadth- rst search and PageRank. Both are instances of a large class of iterative algorithms that share the following characteristics:
1.The graph structure is represented with adjacency lists.
2.Algorithms map over nodes and pass partial results to nodes on their adjacency lists. Partial results are aggregated for each node in the reducer.
3.The graph structure itself is passed from the mapper to the reducer, such that the output is in the same form as the input.
4.Algorithms are iterative and under the control of a non-MapReduce driver program, which checks for termination at the end of each iteration.
The MapReduce programming model does not provide a mechanism to maintain global data structures accessible and mutable by all the mappers and reducers.One implication of this is that communication between pairs of arbitrary nodes is dicult to accomplish. Instead, information typically propagates along graph edges-which gives
rise to the structure of algorithms discussed above.

你可能感兴趣的:(mapreduce,hadoop,dijkstra,pagerank,单源点最短路径)

vue项目运行报：SassError: expected selector... 落日弥漫的橘_ 前端 vue.js webpack css
SassError:expectedselector错误通常由于Sass或SCSS文件中存在语法错误或选择器缺失，导致Sass编译器无法解析。Sass语法语法中误用深度选择器语法导致以上问题。/deep/和::v-deep都是用于Vue单文件组件（.vue文件）中的样式穿透选择器;/deep/在Sass中并不被支持，如果是在Vue单文件组件中，并且需要穿透样式，可以使用::v-deep代替/dee
浅谈 redis BigDeng_2014 工作 redis 数据库 java
redis特点：单进程多线程。主线程只有一个，还有两个副线程，用于文件和IO处理。可以集群部署，数据在各个机器上都有备份，挂了一台机器，可以从其他机器上找到数据。集群带来主从节点机制，一般至少需要3个节点来选举出主节点，也可以指定主节点。主从同步问题，会导致数据不一致。保证了分布式P和可用性A，牺牲了一致性C，可以保证最终一致性。比如数据恢复。把数据存放到内存，从内存查找数据比较快一些。有key-
python爬取自如网房源信息 2401_87368790 python 开发语言
本次爬取自如网房源信息所用到的知识点:requestsget请求lxml解析htmlXpathMongoDB存储正文分析目标站点url:http://hz.ziroom.com/z/nl/z3.html?p=2的p参数控制分页get请求获取单页源码--coding:utf-8--importrequestsimporttimefromrequests.exceptionsimportRequest
ESP32-C3入门教程基础篇④——ADC（模拟量转数字量）单次读取简单实例小康师兄 ESP32-C3入门教程 ESP32 ESP32-C3 ADC 模数转换模拟量转数字量
文章目录一、前言二、硬件接线三、知识点3.1ADC电压范围3.2ADC精度3.3ADC校准3.4ADC读取四、全部源码五、运行演示六、参考一、前言本文基于VSCodeIDE进行编程、编译、下载、运行等操作基础入门章节请查阅：ESP32-C3入门教程基础篇①——基于VSCode构建HelloWorld教程目录大纲请查阅：ESP32-C3入门教程——导读ADC转换是将输入模拟电压转换为数字值。ADC原
深入MapReduce——MRv1设计黄雪超大数据基础 #深入MapReduce mapreduce 大数据
引入通过前面篇章，我们对于MapReduce已经有了不错的了解，由于现在几乎没有使用MapReduce去开发业务需求的场景，甚至MapReduce这个引擎都随着时代变化，快要完全被淘汰了，所以我们就不去水看使用MapReduce编程相关的东西，而是把重点放到一些我们比较感兴趣的点上。今天我们先来看看MRv1计算框架的核心设计实现。MRJob生命周期首先，我们梳理MRJob的生命周期流程如下：Job
不同hive集群中基于表的数据一致性比对 AA赵师傅 hadoop数据管理 hive 数据验证数据迁移 hadoop
前阵子博主遇到一个需求，因对hadoop集群进行数据迁移，数据迁移完毕后进行两个hive库的数据一致性的比对，不仅对源表数据进行比对，而且要同时使用两个集群加工相同数据，对加工后的数据进行数据一致性比对。博主已知的数据迁移方法有两种，第一种就是hadoopdistcp功能来进行集群间数据的复制，那么基本就不用做源表的数据验证了，集群间数据复制失败会报错提示。第二种方法就是数据的导入导出了，把原集群
Hadoop、Hive、Hbase集群间的数据迁移这个操蛋的人生！！！
一、hadoop集群间拷贝数据：迁移之前需要把两个集群的所有节点都互通/etc/hosts文件（重要，包括各个数据节点）两个集群版本不相同hadoopdistcphftp://192.168.57.73:50070/hive3/20171008/hive3/如果两个集群的版本相同，则可以使用hdfs协议，命令如下：hadoopdistcphdfs://namenodeip:9000/foohdfs
Hive面试题汇总大数据侠客 hive相关问题汇总及解决 hive hadoop 数据仓库面试
Hive定义Hive是建立在Hadoop上的数据仓库基础构架。可以将结构化的数据文件映射为一张数据库表，并提供简单的sql查询功能，可以将sql语句转换为MapReduce任务进行运行。其优点是学习成本低，可以通过类SQL语句快速实现简单的MapReduce统计，不必开发专门的MapReduce应用，十分适合数据仓库的统计分析。它提供了一系列的工具，可以用来进行数据提取转化加载（ETL），这是一种
CDH大数据平台梦龙zmc 大数据大数据
CDH概念CDH（ClouderaDistributionIncludingApacheHadoop)是由Cloudera公司提供的一个集成了ApacheHadoop以及相关生态系统的发行版本。CDH是一个大数据平台，简化和加速了大数据处理分析的部署和管理。CDH提供Hadoop的核心元素-可伸缩存储和分布式计算-以及基于web的用户界面和重要的企业功能。CDH是Apache许可的开放源码，是唯一
2023年数学建模动态规划算法在最短路径问题中的应用：以Floyd算法为例人工智能_SYBH 算法 matlab 数据结构动态规划
订阅专栏后9月比赛期间会分享思路及Matlab代码数学建模是将实际问题抽象化为数学问题，并采用数学工具和技巧进行求解的过程。在实际应用中，数学建模是解决问题的一种有效方法。本文将介绍Floyd算法在数学建模中的应用。Floyd算法是解决最短路径问题的一种经典动态规划算法。最短路径问题是指在一个加权有向图中，从一个源节点到其他各节点的最短路径问题。在实际应用中，最短路径问题广泛应用于交通运输、通信网
数据结构与算法再探（二）栈与队列的应用刀客123 数据结构与算法数据结构算法
目录栈应用举例std::stack的基本操作：队列实现栈c++版单队列方式python3应用实例（一）：括号匹配C++栈C++非栈方式python实现实例(二）：后缀表达式求值c++实现python实现队列的应用队：std::queue基本操作栈实现队列队列应用举例：1、约瑟夫问题数组实现：队列实现：双向链表2、单调队列-滑动窗口里的最大值C++python3总结栈应用举例栈是操作受限的线性表，典
HTML基本结构千鸟影沫 html 前端
一、网页三大组成部分结构：html，是超文本标记语言——用来搭建网页的结构样式：css——网页的样式行为：js，是一种语言，称为脚本语言——交互行为用户对网页的操作数据交互二、HTML语法规范1、语法（英文状态下输入）html标签：是由尖括号包围的关键词，例如2、以元素样子对标签进行分类①双标签：如：开始标签，结束标签②单标签：比较少，如，，，等。3、标签关系①嵌套（包含关系）：类似于父子关系如：
Kotlin高仿微信-第13篇-单聊-小视频六毛六66 Kotlin高仿微信 kotlin android java 微信
Kotlin高仿微信-项目实践58篇详细讲解了各个功能点，包括：注册、登录、主页、单聊(文本、表情、语音、图片、小视频、视频通话、语音通话、红包、转账)、群聊、个人信息、朋友圈、支付服务、扫一扫、搜索好友、添加好友、开通VIP等众多功能。Kotlin高仿微信-项目实践58篇，点击查看详情效果图：详细的聊天功能请查看Kotlin高仿微信-第8篇-单聊，这里是提取小视频功能的部分实现。实现代码：/**
单值二叉树（C语言详解版）扶我起来我还能再做一题 leetcode每日一题 c语言算法开发语言
一、摘要今天要讲的是leetcode单值二叉树，这里用到的C语言，主要提供的是思路，大家看了我的思路之后可以点击链接自己试一下。二、题目简介如果二叉树每个节点都具有相同的值，那么该二叉树就是单值二叉树。只有给定的树是单值二叉树时，才返回true；否则返回false。示例1：输入：[1,1,1,1,1,null,1]输出：true示例2：输入：[2,2,2,5,2]输出：false提示：给定树的节点
# AI计算模式神经网络模型深度神经网络多层感知机卷积神经网络循环神经网络长短期记忆网络图像识别、语音识别、自然语言轻量化模型和模型压缩大模型分布式并行 EwenWanW AGI 人工智能神经网络 dnn
AI计算模式AI技术发展至今，主流的模型是深度神经网络模型。近20年来，神经网络模型经过多样化的发展，模型总体变得越来越复杂和庞大，对硬件设备的计算速度、存储能力、通信速度的要求越来越高。尽管学者已经提出了许多方法优化模型结构，降低模型的参数量，但是伴随着人们对AI能力的要求越来越高，模型变得更大是不可避免的。原先单CPU可进行模型的训练与推理，如今需要使用GPU、TPU等设备，并通过分布式并行的
亿级表优化「TIDB 分区篇」，值得收藏彭亚川Allen 数据库 oracle
这是亿级别表优化的第二篇，对第一篇感兴趣的可以看看。亿级表优化思路之SQL篇-掘金写作背景距上次写亿级别优化已经有一个多月了，这段时间也没闲着，Q1对模型做了梳理，重构了这部分业务，主要做了下面这些优化数据模型优化（终于狠下心做了减法，去掉了2个模型）。做了分区表，数据日增量非常快，单表遇到读写瓶颈。复杂SQL优化，上次优化遗留的顽疾。数据清洗（流失数据、已删除数据备份归档）。所以，我还是总结这段
【大数据入门核心技术-Hive】（十六）hive表加载csv格式数据或者json格式数据 forest_long 大数据技术入门到21天通关大数据 hive hadoop 开发语言后端数据仓库
一、环境准备hive安装部署参考：【大数据入门核心技术-Hive】（三）Hive3.1.2非高可用集群搭建【大数据入门核心技术-Hive】（四）Hive3.1.2高可用集群搭建二、hive加载Json格式数据1、数据准备vistu.json[{"id":111,"name":"name111"},{"id":222,"name":"name22"}]上传到hdfshadoopfs-putstu.j
智能工单分配在技术支持中的应用 AI大模型应用之禅计算机软件编程原理与应用实践 java python javascript kotlin golang 架构人工智能
智能工单分配,技术支持,机器学习,算法优化,效率提升,客户满意度1.背景介绍在当今数字化时代，技术支持部门扮演着至关重要的角色，为用户提供及时有效的技术帮助，确保业务的正常运行。然而，随着用户数量和技术需求的不断增长，传统的人工工单分配方式面临着诸多挑战：分配效率低下:人工分配工单需要耗费大量时间和人力，且难以做到精准匹配，导致工单处理效率低下。资源分配不均衡:经验丰富的技术人员可能承担过多的工作
【面试笔记】过河问题｜图论｜羊｜狼｜农夫｜BFS unity
题干要从A岸出发到B岸，A岸有M只羊、N只狼和1个农夫，船每一趟可载X只动物。有农夫看着、或则羊的数量大于狼，羊就不会被吃。请返回任一躺数最少方案。题解题目可转化为：在一个有向无路长的图中，在不知道各个节点之间如何连接的基础上，找到两个节点之间的最短路径。数据结构publicclassPack{publicintsheep;//羊的数量publicintwolf;//狼的数量publicintfa
Windows Server 虚拟化环境中SR-IOV网络I/O增强功能 xidianjiapei001 虚拟化技术 windows 网络 SR-IOV 虚拟化 IO虚拟化
WindowsServer虚拟化环境中SR-IOV网络I/O增强功能目录详细文章症状原因解决方案受影响的产品总结：戴尔技术中心的戴尔操作系统和应用解决方案-包括ProjectSputnik、微软Windows、红帽Linux、SUSE、Ubuntu等详细文章症状微软在WindowsServer2012Beta操作系统中引入了对网络领域多项功能的支持。其中一项重要且有趣的功能是单根I/O虚拟化（SR
币定非凡：行情如巨浪袭来，是踏浪而行还是退避三舍！ weixin_34050389
时间是一饼普洱，越放越纯。时间是一瓶老酒，越放越香。时间是一束玫瑰，短暂却留香。有人恐惧时间的流逝，但有人期待时间的沉淀。岁月的长河里，流淌着各式各样的故事。酒的香，就是因为时间的沉淀。普洱的甘甜，就是因为时间的挥发。我们常常因为年龄问题而看不透一些故事，但时间的流逝，却能让一切极为简单。套单不套心，解套有策略！很多人套单情况都是如此，因为单子被套了，到了保本点位，还想着亏的时候不出，再等
【算法笔记】洛谷 - 贪心算法 - P1208 [USACO1.3] 混合牛奶 Mixing Milk 仟濹算法学习笔记算法笔记贪心算法 c++c语言
2024-12-26-第43篇洛谷贪心算法题单-贪心算法-学习笔记作者(Author):郑龙浩/仟濹(CSND账号名)洛谷P1208[USACO1.3]混合牛奶MixingMilk文章目录洛谷P1208[USACO1.3]混合牛奶MixingMilk题目描述输入格式输出格式样例#1样例输入#1样例输出#1提示:思路：代码：题目描述由于乳制品产业利润很低，所以降低原材料（牛奶）价格就变得十分重要。帮
数据结构与算法分析：专题内容——人工智能中的寻路3之广度优先搜索（代码详解）梅见十柒数据结构与算法分析算法 c语言广度优先笔记
一、前言广度优先搜索尝试在不重复访问状态的情况下，寻找到一条最短路径。广度优先搜索保证如果存在一条到目标状态的路径，那么找到的肯定是最短路径。事实上，深度优先搜索和广度优先搜索的唯一不同就是广度优先搜索使用队列来保存开放集，而深度优先搜索使用栈。每次迭代时，广度优先搜索从队列头拿出一个未访问的状态，然后从这个状态开始，计算后继状态。如果达到了目标状态，那么搜索结束。任何已经在闭合集中的后继状态将会
2024 年，如何在React Native 中设置 Google 广告
将GoogleAds集成到你的ReactNative应用程序中是通过广告来盈利的极佳方式。本指南将指导使用react-native-google-mobile-ads包设置GoogleAds，该包支持Android和iOS平台。开始之前，请确保你有以下内容：一个ReactNative项目的设置。对ReactNative开发的基础知识。一个GoogleAdMob账户以及Android和iOS的广告单
python multiprocessing模块_Python multiprocessing模块 weixin_39646084 python
一、简介python多线程有个讨厌的限制，全局解释器锁(globalinterpreterlock)，这个锁的意思是任一时间只能有一个线程使用解释器，跟单cpu跑多个程序一个意思，大家都是轮着用的，这叫“并发”，不是“并行”。手册上的解释是为了保证对象模型的正确性！这个锁造成的困扰是如果有一个计算密集型的线程占着cpu，其他的线程都得等着....，试想你的多个线程中有这么一个线程，得多悲剧，多线程
cascading 入门（一） zhumin726
1cascading是什么cascading是一个架构在Hadoop上的API，用来创建复杂和容错数据处理工作流。它抽象了集群拓扑结构和配置来快速开发复杂分布式的应用，而不用考虑背后的MapReduce。Cascading目前依赖于Hadoop提供存储和执行架构，但是CascadingAPI为开发者隔离了Hadoop的技术细节，提供了不需要改变初始流程工作流定义就可以在不同的计算框架内运行的能力。
【代码】PPARCHP1，批量给SAP生产订单打上删除标识，提高月结时工单结算效率 saplakes #SAP_ABAP ABAP SAP PPARCHP1 生产订单批量删除标识
文章目录前言一、PPARCHP1二、PPARCHP1代码三、权限检查四、效果前言SAP系统的生产订单产生的数量太多之后，会导致SAP月结时生产订单结算时的工单结算比较缓慢。因此以原代码拷贝出来，加上了权限检查、创建事务码，并分配给财务会计来定期运行，提高月结效率。一、PPARCHP1程序是PPARCHP1二、PPARCHP1代码REPORTpparchp1.*-------------------
搭建vue项目 LYy0 vue.js 前端 javascript
一、VueVue是一款用于构建用户界面的JavaScript框架。它基于标准HTML、CSS和JavaScript构建，并提供了一套声明式的、组件化的编程模型，帮助你高效地开发用户界面。无论是简单还是复杂的界面，Vue都可以胜任。二、环境1.nodejs环境:下载地址三、创建Vue应用创建的项目将使用基于Vite的构建设置，并允许我们使用Vue的单文件组件(SFC)。$npmcreatevue@l
【MotionCap】DROID-SLAM 1 ：介绍及安装等风来不如迎风去 AI入门与实战人工智能 SLAHMR DROID-SLAM
DROID-SLAM：DROID-SLAM:DeepVisualSLAMforMonocularDROID-SLAM：适用于单目、立体和RGB-D相机的深度视觉SLAMStereo,andRGB-DCamerashttps://arxiv.org/abs/2108.10869DROID-SLAM:DeepVisualSLAMforMonocular,Stereo,andRGB-DCamerasfi
架构学习第四周--高可用与NoSQL数据库 Mr.王835 nosql linux
目录一、HAProxy介绍二、HAProxy基本使用2.1，HAProxy调度算法2.2，HAProxy高级用法三、高可用Keepalived介绍3.1，Keepalived介绍3.2，Keepalived单主架构实现3.3，脑裂四、Keepalived实例--实现单主架构的LVS-DR模型五、实例--通过Keepalived实现HAProxy高可用六、NoSQL数据库Redis6.1，Redis
数据采集高并发的架构应用 3golden .net
问题的出发点：最近公司为了发展需要，要扩大对用户的信息采集，每个用户的采集量估计约2W。如果用户量增加的话，将会大量照成采集量成3W倍的增长，但是又要满足日常业务需要，特别是指令要及时得到响应的频率次数远大于预期。 &n
不停止 MySQL 服务增加从库的两种方式 brotherlamp linux linux视频 linux资料 linux教程 linux自学
现在生产环境MySQL数据库是一主一从，由于业务量访问不断增大，故再增加一台从库。前提是不能影响线上业务使用，也就是说不能重启MySQL服务，为了避免出现其他情况，选择在网站访问量低峰期时间段操作。一般在线增加从库有两种方式，一种是通过mysqldump备份主库，恢复到从库，mysqldump是逻辑备份，数据量大时，备份速度会很慢，锁表的时间也会很长。另一种是通过xtrabacku
Quartz——SimpleTrigger触发器 eksliang SimpleTrigger TriggerUtils quartz
转载请出自出处：http://eksliang.iteye.com/blog/2208166 一.概述 SimpleTrigger触发器，当且仅需触发一次或者以固定时间间隔周期触发执行；二.SimpleTrigger的构造函数 SimpleTrigger(String name, String group)：通过该构造函数指定Trigger所属组和名称； Simpl
Informatica应用（1） 18289753290 sql workflow lookup 组件 Informatica
1.如果要在workflow中调用shell脚本有一个command组件，在里面设置shell的路径；调度wf可以右键出现schedule，现在用的是HP的tidal调度wf的执行。 2.designer里面的router类似于SSIS中的broadcast（多播组件）;Reset_Workflow_Var：参数重置（比如说我这个参数初始是1在workflow跑得过程中变成了3我要在结束时还要
python 获取图片验证码中文字酷的飞上天空 python
根据现成的开源项目 http://code.google.com/p/pytesser/改写在window上用easy_install安装不上看了下源码发现代码很少于是就想自己改写一下添加支持网络图片的直接解析 #coding:utf-8 #import sys #reload(sys) #sys.s
AJAX 永夜-极光 Ajax
1.AJAX功能:动态更新页面,减少流量消耗,减轻服务器负担 2.代码结构: <html> <head> <script type="text/javascript"> function loadXMLDoc() { .... AJAX script goes here ...
创业OR读研随便小屋创业
现在研一，有种想创业的想法，不知道该不该去实施。因为对于的我情况这两者是矛盾的，可能就是鱼与熊掌不能兼得。研一的生活刚刚过去两个月，我们学校主要的是
需求做得好与坏直接关系着程序员生活质量 aijuans IT 生活
这个故事还得从去年换工作的事情说起，由于自己不太喜欢第一家公司的环境我选择了换一份工作。去年九月份我入职现在的这家公司，专门从事金融业内软件的开发。十一月份我们整个项目组前往北京做现场开发，从此苦逼的日子开始了。系统背景：五月份就有同事前往甲方了解需求一直到6月份，后续几个月也完
如何定义和区分高级软件开发工程师 aoyouzi
在软件开发领域，高级开发工程师通常是指那些编写代码超过 3 年的人。这些人可能会被放到领导的位置，但经常会产生非常糟糕的结果。Matt Briggs 是一名高级开发工程师兼 Scrum 管理员。他认为，单纯使用年限来划分开发人员存在问题，两个同样具有 10 年开发经验的开发人员可能大不相同。近日，他发表了一篇博文，根据开发者所能发挥的作用划分软件开发工程师的成长阶段。　　初
Servlet的请求与响应百合不是茶 servlet get提交 java处理post提交
Servlet是tomcat中的一个重要组成,也是负责客户端和服务端的中介 1,Http的请求方式(get ,post); 客户端的请求一般都会都是Servlet来接受的,在接收之前怎么来确定是那种方式提交的,以及如何反馈,Servlet中有相应的方法, http的get方式 servlet就是都doGet(
web.xml配置详解之listener bijian1013 java web.xml listener
一.定义 <listener> <listen-class>com.myapp.MyListener</listen-class> </listener> 二.作用该元素用来注册一个监听器类。可以收到事件什么时候发生以及用什么作为响
Web页面性能优化（yahoo技术） Bill_chen JavaScript Ajax Web css Yahoo
1.尽可能的减少HTTP请求数 content 2.使用CDN server 3.添加Expires头(或者 Cache-control) server 4.Gzip 组件 server 5.把CSS样式放在页面的上方。 css 6.将脚本放在底部(包括内联的) javascript 7.避免在CSS中使用Expressions css 8.将javascript和css独立成外部文
【MongoDB学习笔记八】MongoDB游标、分页查询、查询结果排序 bit1129 mongodb
游标游标，简单的说就是一个查询结果的指针。游标作为数据库的一个对象，使用它是包括声明打开循环抓去一定数目的文档直到结果集中的所有文档已经抓取完关闭游标游标的基本用法，类似于JDBC的ResultSet(hasNext判断是否抓去完,next移动游标到下一条文档)，在获取一个文档集时，可以提供一个类似JDBC的FetchSize
ORA-12514 TNS 监听程序当前无法识别连接描述符中请求服务的解决方法白糖_ ORA-12514
今天通过Oracle SQL*Plus连接远端服务器的时候提示“监听程序当前无法识别连接描述符中请求服务”，遂在网上找到了解决方案： ①打开Oracle服务器安装目录\NETWORK\ADMIN\listener.ora文件，你会看到如下信息： # listener.ora Network Configuration File: D:\database\Oracle\net
Eclipse 问题 A resource exists with a different case bozch eclipse
在使用Eclipse进行开发的时候，出现了如下的问题： Description Resource Path Location TypeThe project was not built due to "A resource exists with a different case: '/SeenTaoImp_zhV2/bin/seentao'.&
编程之美-小飞的电梯调度算法 bylijinnan 编程之美
public class AptElevator { /** * 编程之美小飞电梯调度算法 * 在繁忙的时间，每次电梯从一层往上走时，我们只允许电梯停在其中的某一层。 * 所有乘客都从一楼上电梯，到达某层楼后，电梯听下来，所有乘客再从这里爬楼梯到自己的目的层。 * 在一楼时，每个乘客选择自己的目的层，电梯则自动计算出应停的楼层。 * 问：电梯停在哪
SQL注入相关概念 chenbowen00 sql Web 安全
SQL Injection：就是通过把SQL命令插入到Web表单递交或输入域名或页面请求的查询字符串，最终达到欺骗服务器执行恶意的SQL命令。具体来说，它是利用现有应用程序，将（恶意）的SQL命令注入到后台数据库引擎执行的能力，它可以通过在Web表单中输入（恶意）SQL语句得到一个存在安全漏洞的网站上的数据库，而不是按照设计者意图去执行SQL语句。首先让我们了解什么时候可能发生SQ
[光与电]光子信号战防御原理 comsci 原理
无论是在战场上,还是在后方,敌人都有可能用光子信号对人体进行控制和攻击,那么采取什么样的防御方法,最简单,最有效呢? 我们这里有几个山寨的办法,可能有些作用,大家如果有兴趣可以去实验一下根据光
oracle 11g新特性:Pending Statistics daizj oracle dbms_stats
oracle 11g新特性:Pending Statistics 转从11g开始，表与索引的统计信息收集完毕后，可以选择收集的统信息立即发布，也可以选择使新收集的统计信息处于pending状态，待确定处于pending状态的统计信息是安全的，再使处于pending状态的统计信息发布，这样就会避免一些因为收集统计信息立即发布而导致SQL执行计划走错的灾难。在 11g 之前的版本中，D
快速理解RequireJs dengkane jquery requirejs
RequireJs已经流行很久了，我们在项目中也打算使用它。它提供了以下功能：声明不同js文件之间的依赖可以按需、并行、延时载入js库可以让我们的代码以模块化的方式组织初看起来并不复杂。在html中引入requirejs 在HTML中，添加这样的 <script> 标签： <script src="/path/to
C语言学习四流程控制if条件选择、for循环和强制类型转换 dcj3sjt126com c
# include <stdio.h> int main(void) { int i, j; scanf("%d %d", &i, &j); if (i > j) printf("i大于j\n"); else printf("i小于j\n"); retu
dictionary的使用要注意 dcj3sjt126com IO
NSDictionary *dict = [NSDictionary dictionaryWithObjectsAndKeys: user.user_id , @"id", user.username , @"username",
Android 中的资源访问(Resource) finally_m xml android String drawable color
简单的说，Android中的资源是指非代码部分。例如，在我们的Android程序中要使用一些图片来设置界面，要使用一些音频文件来设置铃声，要使用一些动画来显示特效，要使用一些字符串来显示提示信息。那么，这些图片、音频、动画和字符串等叫做Android中的资源文件。在Eclipse创建的工程中，我们可以看到res和assets两个文件夹，是用来保存资源文件的，在assets中保存的一般是原生
Spring使用Cache、整合Ehcache 234390216 spring cache ehcache @Cacheable
Spring使用Cache 从3.1开始，Spring引入了对Cache的支持。其使用方法和原理都类似于Spring对事务管理的支持。Spring Cache是作用在方法上的，其核心思想是这样的：当我们在调用一个缓存方法时会把该方法参数和返回结果作为一个键值对存放在缓存中，等到下次利用同样的
当druid遇上oracle blob(clob) jackyrong oracle
http://blog.csdn.net/renfufei/article/details/44887371 众所周知，Oracle有很多坑, 所以才有了去IOE。在使用Druid做数据库连接池后，其实偶尔也会碰到小坑，这就是使用开源项目所必须去填平的。【如果使用不开源的产品，那就不是坑，而是陷阱了，你都不知道怎么去填坑】用Druid连接池，通过JDBC往Oracle数据库的
easyui datagrid pagination获得分页页码、总页数等信息 ldzyz007
var grid = $('#datagrid'); var options = grid.datagrid('getPager').data("pagination").options; var curr = options.pageNumber; var total = options.total; var max =
浅析awk里的数组 nigelzeng 二维数组 array 数组 awk
awk绝对是文本处理中的神器，它本身也是一门编程语言，还有许多功能本人没有使用到。这篇文章就单单针对awk里的数组来进行讨论，如何利用数组来帮助完成文本分析。有这么一组数据： abcd,91#31#2012-12-31 11:24:00 case_a,136#19#2012-12-31 11:24:00 case_a,136#23#2012-12-31 1
搭建 CentOS 6 服务器(6) - TigerVNC rensanning centos
安装GNOME桌面环境 # yum groupinstall "X Window System" "Desktop" 安装TigerVNC # yum -y install tigervnc-server tigervnc 启动VNC服务 # /etc/init.d/vncserver restart # vncser
Spring 数据库连接整理 tomcat_oracle spring bean jdbc
1、数据库连接jdbc.properties配置详解　　jdbc.url=jdbc:hsqldb:hsql://localhost/xdb 　　jdbc.username=sa 　　jdbc.password= 　　jdbc.driver=不同的数据库厂商驱动，此处不一一列举　　接下来，详细配置代码如下：　　 Spring连接池
Dom4J解析使用xpath java.lang.NoClassDefFoundError: org/jaxen/JaxenException异常 xp9802
用Dom4J解析xml,以前没注意,今天使用dom4j包解析xml时在xpath使用处报错异常栈：java.lang.NoClassDefFoundError: org/jaxen/JaxenException异常导入包 jaxen-1.1-beta-6.jar 解决; &nb