hadoop流行音乐排行榜实例

前言

本实例是本人在课设上的作品,为本人原创,文中有设计思路和核心算法,若本人在理解上有问题或者代码还需优化的地方,欢迎一起讨论。

实例描述

现在在网上听歌的人越来越多,有些人听自己常听的歌,厌倦了想听新歌曲,但是又不知道去哪找,如果有音乐排行榜就可以解决这一问题。所以做一个最受欢迎单曲统计程序很有价值也很实用。普通的音乐处理程序无法解决互联网上的巨量信息,我们所学的hadoop分布式恰恰可以处理大数据,所以用hadoop的mapreduce来解决最受欢迎单曲统计这个问题再好不过了。

业务逻辑

  1. 输入n个文件
  2. 读取n个文件内容
  3. 统计每个单曲的数量
  4. 比较每个单曲数量,得出排名

hadoop流行音乐排行榜实例_第1张图片

难点

  1. 是不是要考虑一些几个单词组成的歌曲,要重新设计如何从文件中获取每首歌曲
  2. 对于生成的文件,如何进行排序
/*
*@name class:MusicLine.java
*@function 实现分割文本文件歌曲
*/

package musicrank.tiger.com;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

public class MusicLine {
     
    private String music;
    private IntWritable one = new IntWritable(1);
    // the flag represent the data is available

    private boolean right = true;

    public MusicLine(String musicLine) {
        // to test whether a row of data is reasonable
        if (musicLine == null || "".equals(musicLine)) {
            this.right = false;
            return;
        }
        String[] strs = musicLine.split("//"); // the music split by //
/*      if (strs.length < 2) {
            this.right = false;
            return;
        }*/
        this.music = strs[0];
    }

    public boolean isRight() {
        return right;
    }

    /*
     * @param return map -- key
     * 
     * @return
     * 
     */
    public Text getMusicCountMapOutKey() {
        return new Text(this.music);
    }

    /*
     * @param return map -- value
     * 
     * @return
     * 
     */
    public IntWritable getMusicCountMapOutValue() {
        return this.one;
    }
}
/*
*@name class:MusicMapper.java
*@function 将key和value写入map(key,value)
*/

package musicrank.tiger.com;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

// 创建一个 WordMapper类 继承于 Mapper抽象类
public class MusicMapper extends Mapper<Object, Text, Text, IntWritable> {
     
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    // Mapper抽象类的核心方法,三个参数
    public void map(Object key, // 首字符偏移量
            Text value, // 文件的一行内容
            Context context) // Mapper端的上下文,与 OutputCollector 和 Reporter 的功能类似
            throws IOException, InterruptedException {
        /*
         * StringTokenizer itr = new StringTokenizer(value.toString()); while
         * (itr.hasMoreTokens()) { word.set(itr.nextToken());
         * context.write(word, one);
         */
        MusicLine musicLine = new MusicLine(value.toString());
        if (musicLine.isRight()) {
            context.write(musicLine.getMusicCountMapOutKey(), musicLine.getMusicCountMapOutValue());
        }

    }
}
/*
*@name class:MusicReducer.java
*@function 通过遍历values,根据key对应的value个数重新计算key的值,然后通过context.write写入
*/

package musicrank.tiger.com;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

// 创建一个 WordReducer类 继承于 Reducer抽象类
public class MusicReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
     
    private IntWritable result = new IntWritable(); // 用于记录 key 的最终的词频数

    // Reducer抽象类的核心方法,三个参数
    public void reduce(Text key, // Map端 输出的 key 值
            Iterable values, // Map端 输出的 Value 集合(相同key的集合)
            Context context) // Reduce 端的上下文,与 OutputCollector 和 Reporter 的功能类似
            throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable val : values) // 遍历 values集合,并把值相加
            {
                sum += val.get();
            }
            result.set(sum); // 得到最终词频数
            context.write(key, result); // 写入结果
    }
}
/*
*@name class:MusicMapSort.java 
*@function 继承于IntWritable.Comparator,实现对map(key,value)进行排序
*/

package musicrank.tiger.com;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparator;

public class MusicMapSort extends IntWritable.Comparator{
     
    public int compare(WritableComparator a,WritableComparator b) {
        return -super.compare(a, b);
    }
    public int compare(byte[]b1,int s1,int l1,byte[]b2,int s2,int l2) {
        return -super.compare(b1, s1, l1, b2, s2, l2);
    }
}
/*
*@name class:MusicMain.java
*@function 主函数,对功能的 实现
*/

package musicrank.tiger.com;

import java.util.Random;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.map.InverseMapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class MusicMain {

    public static void main(String[] args) throws Exception {
        // Configuration类:读取Hadoop的配置文件,如 site-core.xml...;
        // 也可用set方法重新设置(会覆盖):conf.set("fs.default.name", "hdfs://xxxx:9000")
        Configuration conf = new Configuration();

        // 将命令行中参数自动设置到变量conf中
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        /**
         * 这里必须有输入输出
         */

        if (otherArgs.length != 2) {
            System.err.println("Usage: wordcount  ");
            System.exit(2);
        }

        Path tempDir = new Path("wordcount-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

        @SuppressWarnings("deprecation")
        Job job = new Job(conf, "word count"); // 新建一个 job,传入配置信息
        job.setJarByClass(MusicMain.class); // 设置 job 的主类

        try {
            job.setMapperClass(MusicMapper.class);// 设置 job 的 Mapper 类
            job.setCombinerClass(MusicReducer.class); // 设置 job 的 作业合成类
            job.setReducerClass(MusicReducer.class); // 设置 job 的 Reducer 类
            job.setOutputKeyClass(Text.class); // 设置 job 输出数据的关键类
            job.setOutputValueClass(IntWritable.class); // 设置 job 输出值类
            FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // 文件输入

            FileOutputFormat.setOutputPath(job, tempDir); // temp dir

            // FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); //
            // 文件输出

            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            if (job.waitForCompletion(true)) {
                Job sortJob = new Job(conf, "sort");
                sortJob.setJarByClass(MusicMain.class);

                FileInputFormat.addInputPath(sortJob, tempDir);
                sortJob.setInputFormatClass(SequenceFileInputFormat.class);

                sortJob.setMapperClass(InverseMapper.class);
                sortJob.setNumReduceTasks(1);
                FileOutputFormat.setOutputPath(sortJob, new Path(otherArgs[1]));

                sortJob.setOutputKeyClass(IntWritable.class);
                sortJob.setOutputValueClass(Text.class);
                sortJob.setSortComparatorClass(MusicMapSort.class);

                System.exit(sortJob.waitForCompletion(true) ? 0 : 1); // 等待完成退出
            }

        } finally {
            FileSystem.get(conf).deleteOnExit(tempDir);
        }
    }
}

测试用例

music1.txt

As long as you love me//
Baby//
Love Yourself//
Sorry//
My Boyfriend Is Gay//
Sugar//
Cold//
Let Me Love You//
Feeling U//
Stronger//Faded//
I Am You//
Time Machine//
What Are Words//
Black Black Heart//
Sketch Plane//
We Are One//
Boyfriend//
Animals//Animals//
Booty Music//
Booty Music//
Booty Music//
Booty Music//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
Let Her Go//La La La//Walk Away//
Baby//
Baby//
Baby//
Love Yourself//
Love Yourself//
Love Yourself//
Love Yourself//
Love Yourself//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
Baby//
Love Yourself//
Sorry//
My Boyfriend Is Gay//
Sugar//
Cold//
Let Me Love You//
Feeling U//
Stronger//
Faded//
I Am You//
Time Machine//
What Are Words//
Black Black Heart//
Sketch Plane//
We Are One//
Boyfriend//
Animals//
Animals//
Booty Music//
Booty Music//
Booty Music//
Booty Music//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
Let Her Go//
La La La//
Walk Away//
Baby//
Baby//
Baby//
Love Yourself//
Love Yourself//
Love Yourself//
Love Yourself//
Love Yourself//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
Free Loop//
Top Of The World//
Solo Dance//
Until You//
Numb//
Numb//
Numb//
Numb//
Numb//
Numb//
Numb//
Numb//
Communication//
I just Wanna Run

music2.txt

As long as you love me//
Baby//
Love Yourself//
Sorry//
My Boyfriend Is Gay//
Sugar//
Cold//
Let Me Love You//
Feeling U//
Stronger//Faded//
I Am You//
Time Machine//
What Are Words//
Black Black Heart//
Sketch Plane//
We Are One//
Boyfriend//
Animals//Animals//
Booty Music//
Booty Music//
Booty Music//
Booty Music//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
Let Her Go//La La La//Walk Away//
Baby//
Baby//
Baby//
Love Yourself//
Love Yourself//
Love Yourself//
Love Yourself//
Love Yourself//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
Baby//
Love Yourself//
Sorry//
My Boyfriend Is Gay//
Sugar//
Cold//
Let Me Love You//
Feeling U//
Stronger//
Faded//
I Am You//
Time Machine//
What Are Words//
Black Black Heart//
Sketch Plane//
We Are One//
Boyfriend//
Animals//
Animals//
Booty Music//
Booty Music//
Booty Music//
Booty Music//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
Let Her Go//
La La La//
Walk Away//
Baby//
Baby//
Baby//
Love Yourself//
Love Yourself//
Love Yourself//
Love Yourself//
Love Yourself//
As long as you love me//
As long as you love me//
As long as you love me//
As long as you love me//
Black Black Heart//
Sketch Plane//
We Are One//
Boyfriend//
Animals//
Animals//
Booty Music

结果

hadoop流行音乐排行榜实例_第2张图片

你可能感兴趣的:(大数据,hadoop,大数据,分布式)