hadoop2.2编程:矩阵相乘简单实现

  1 /*

  2 matrix-matrix multiplication on Hadoop

  3 

  4 A x B = C

  5 constraint: A, B, C must be of the same size

  6 

  7 I use this to evaluate the efficiency of Hadoop for matrix multiplication,

  8 so I really don't care to handle non-square matrices.

  9 

 10 ===Data preparation====

 11 Matrix data must be stored in a file on Hadoop.

 12 Line number must be appended to the beginning of each line.

 13 For example, the following represents a 4x4 matrix:

 14 

 15 0 18 20 16 14

 16 1 17 12 11 19

 17 2 10 17 11 19

 18 3 14 17 20 10

 19 

 20 Left (A in this example) matrix should be stored in file "left";

 21 Right (B in this example) matrix should be stored in file "right";

 22 I use filenames to distinguish input data.

 23 

 24 Place "left" and "right" in the same folder (let's call it "input")

 25 

 26 ====Run the program====

 27 > hadoop jar matrixmul.jar MatrixMul input output 8 2

 28 

 29 results will be placed in "output" folder on HDFS.

 30 8: all matrices are 8x8

 31 2: every partitioned block is of size 2x2

 32 

 33 ===Read the results===

 34 Given the above sample command, we multiply two 8x8 matrices,

 35 in many 2x2 blocks. So, that the resulted C matrix has 16 blocks.

 36 

 37 In the output folder, there will be 16 separate files:

 38 part-r-00000, part-r-00001, ... part-r-00015

 39 

 40 Every file stores one block in C. In this example, every block

 41 has 2 rows and 2 columns.

 42 

 43 These files are organized in "row"-order.

 44 

 45 ===Algorithm===

 46 Mappers read input data.

 47 Every reducer processes one block of the resulted matrix.

 48 

 49 */

 50 import java.io.IOException;

 51 import java.util.StringTokenizer;

 52 

 53 import org.apache.hadoop.conf.Configuration;

 54 import org.apache.hadoop.fs.Path;

 55 import org.apache.hadoop.io.IntWritable;

 56 import org.apache.hadoop.io.LongWritable;

 57 import org.apache.hadoop.io.Text;

 58 import org.apache.hadoop.mapreduce.Job;

 59 import org.apache.hadoop.mapreduce.Mapper;

 60 import org.apache.hadoop.mapreduce.Reducer;

 61 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 62 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

 63 import org.apache.hadoop.mapreduce.lib.input.FileSplit;

 64 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 65 import org.apache.hadoop.util.GenericOptionsParser;

 66 

 67 public class MatrixMul {

 68 

 69 public static class MyMapper extends Mapper<LongWritable, Text, IntWritable, Text>{

 70 

 71 private String filename=null;

 72 private boolean isLeftMatrix=false;

 73 private int totalSize, partSize, npart;

 74 

 75 private boolean isLeft(){return isLeftMatrix;}

 76 protected void setup(Context context) throws IOException, InterruptedException{

 77 //get filename

 78 FileSplit fileSplit = (FileSplit)context.getInputSplit();

 79 filename = fileSplit.getPath().getName();

 80 if("left".equalsIgnoreCase(filename))

 81 isLeftMatrix=true;

 82 else

 83 isLeftMatrix=false;

 84 

 85 //get how size and partition information

 86 Configuration conf=context.getConfiguration();

 87 totalSize=conf.getInt("matrix-mul-totalsize", -1);

 88 partSize=conf.getInt("matrix-mul-partsize", -1);

 89 npart=conf.getInt("matrix-mul-npart", -1);

 90 if(totalSize<0 || partSize<0 || npart<0){

 91 System.out.println("Error in setup of MyMapper.");

 92 System.exit(1);

 93 }

 94 }

 95 

 96 public void map(LongWritable key, Text value, Context context

 97 ) throws IOException, InterruptedException {

 98 String line=value.toString();

 99 String[] strs=line.split(" ");

100 if(strs.length!=totalSize+1){

101 System.out.println("Error in map of Mapper.");

102 System.out.println(strs.length+"___"+totalSize);

103 System.out.println("line is: "+line);

104 System.exit(1);

105 }

106 int linenum=Integer.parseInt(strs[0]);

107 int[] numbers=new int[totalSize];

108 for(int i=0;i<totalSize;i++)

109 numbers[i]=Integer.parseInt(strs[i+1]);

110 int part_hor=linenum/partSize; //horizontal partitioned id

111 int prev_part_ver=-1;

112 String msg=null;

113 for(int i=0;i<totalSize;i++){

114 int part_ver=i/partSize; //vertical partition number

115 if(part_ver!=prev_part_ver){

116 if(msg!=null){

117 int baselinenum = part_hor * partSize;

118 int old=part_ver;

119 part_ver=prev_part_ver;

120 if(isLeft()){

121 String toSend="l:"+(linenum - baselinenum)+":"+part_ver+"#"+msg;

122 System.out.println("left "+linenum+","+part_ver+" "+msg);

123 for(int k=0;k<npart;k++){

124 int dest=part_hor * npart + k;

125 context.write(new IntWritable(dest), new Text(toSend));

126 }

127 }else{

128 String toSend="r:"+(linenum - baselinenum)+":"+part_hor+"#"+msg;

129 System.out.println("right "+part_ver+":"+linenum+" "+msg);

130 for(int k=0;k<npart;k++){

131 int dest=k * npart + part_ver;

132 context.write(new IntWritable(dest), new Text(toSend));

133 }

134 }

135 part_ver=old;

136 }

137 msg=null;

138 prev_part_ver=part_ver;

139 }

140 if(msg==null)

141 msg=""+strs[i+1];

142 else

143 msg+=" "+strs[i+1];

144 }

145 if(msg!=null){ //almost the same code 

146 int part_ver=npart-1;

147 int baselinenum = part_hor * partSize;

148 if(isLeft()){

149 String toSend="l:"+(linenum - baselinenum)+":"+part_ver+"#"+msg;

150 System.out.println("left "+linenum+","+part_ver+" "+msg);

151 for(int k=0;k<npart;k++){

152 int dest=part_hor * npart + k;

153 context.write(new IntWritable(dest), new Text(toSend));

154 }

155 }else{

156 String toSend="r:"+(linenum - baselinenum)+":"+part_hor+"#"+msg;

157 System.out.println("right "+part_ver+":"+linenum+" "+msg);

158 for(int k=0;k<npart;k++){

159 int dest=k * npart + part_ver; //has to be the last part

160 context.write(new IntWritable(dest), new Text(toSend));

161 }

162 }

163 }

164 }

165 }

166 

167 public static class MyReducer extends Reducer<IntWritable, Text, Text, Text> {

168 

169 private int totalSize, partSize, npart;

170 int[][] left=null;

171 int[][] right=null;

172 protected void setup(Context context) throws IOException, InterruptedException{

173 //get how # of partitions

174 Configuration conf=context.getConfiguration();

175 totalSize=conf.getInt("matrix-mul-totalsize", -1);

176 partSize=conf.getInt("matrix-mul-partsize", -1);

177 npart=conf.getInt("matrix-mul-npart", -1);

178 if(totalSize<0 || partSize<0 || npart<0){

179 System.out.println("Error in setup of MyReducer.");

180 System.exit(1);

181 }

182 left=new int[partSize][totalSize];

183 right=new int[totalSize][partSize];

184 }

185 public void reduce(IntWritable key, Iterable<Text> values, Context context

186 ) throws IOException, InterruptedException {

187 int sum = 0;

188 for (Text val : values) {

189 String line=val.toString();

190 String[] meta_val=line.split("#");

191 String[] metas=meta_val[0].split(":");

192 String[] numbers=meta_val[1].split(" ");

193 

194 int baselinenum=Integer.parseInt(metas[1]);

195 int blkindex=Integer.parseInt(metas[2]);

196 if("l".equalsIgnoreCase(metas[0])){ //from left matrix

197 int start=blkindex * partSize;

198 for(int i=0;i<partSize; i++)

199 left[baselinenum][start+i]=Integer.parseInt(numbers[i]);

200 }else{

201 int rowindex=blkindex*partSize + baselinenum;

202 for(int i=0;i<partSize; i++)

203 right[rowindex][i]=Integer.parseInt(numbers[i]);

204 }

205 }

206 }

207 protected void cleanup(Context context) throws IOException, InterruptedException {

208 //now let's do the calculation

209 int[][] res=new int[partSize][partSize];

210 for(int i=0;i<partSize;i++)

211 for(int j=0;j<partSize;j++)

212 res[i][j]=0;

213 for(int i=0;i<partSize;i++){

214 for(int k=0;k<totalSize;k++){

215 for(int j=0;j<partSize;j++){

216 res[i][j]+=left[i][k]*right[k][j];

217 }

218 }

219 }

220 for(int i=0;i<partSize;i++){

221 String output=null;

222 for(int j=0;j<partSize;j++){

223 if(output==null)

224 output=""+res[i][j];

225 else

226 output+=" "+res[i][j];

227 }

228 context.write(new Text(output), null);

229 }

230 }

231 }

232 public static void main(String[] args) throws Exception {

233 Configuration conf = new Configuration();

234 if (args.length != 4) {

235 System.err.println("Usage: MatrixMul input-dir output-dir total-size part-size");

236 System.exit(2);

237 }

238 int totalsize=Integer.parseInt(args[2]);

239 int partsize=Integer.parseInt(args[3]);

240 if(totalsize==0 || partsize==0 || partsize>totalsize){

241 System.out.println("Invalid total-size or part-size");

242 System.exit(1);

243 }

244 conf.setInt("matrix-mul-totalsize", totalsize); //the matrix is 'totalsize' by 'totalsize'

245 conf.setInt("matrix-mul-partsize", partsize); //every block is 'partsize' by 'partsize'

246 int npart=totalsize/partsize;

247 if(npart*partsize<totalsize)

248 npart++;

249 conf.setInt("matrix-mul-npart", npart); //number of parts on one dimension

250 Job job = new Job(conf, "matrix-mul");

251 job.setJarByClass(MatrixMul.class);

252 job.setMapperClass(MyMapper.class);

253 job.setReducerClass(MyReducer.class);

254 job.setNumReduceTasks(npart*npart);

255 

256 job.setOutputKeyClass(IntWritable.class);

257 job.setOutputValueClass(Text.class);

258 

259 //FileInputFormat.addInputPath(job, new Path(args[0]));

260 TextInputFormat.addInputPath(job, new Path(args[0])); //need to read a complete line

261 FileOutputFormat.setOutputPath(job, new Path(args[1]));

262 job.waitForCompletion(true) ;

263 }

264 }

 

你可能感兴趣的:(hadoop2)