mapreduce现在开源的主要是hadoop,它是用JAVA写的,但google自已用的是C++写的,JAVA的东西性能方面还是和C/C++没法比的,从网上找了找,C/C++的开源mapreduce有下面两个,但功能还比较弱,没实现分布式,只能当理论学习学习,
http://mapreduce.stanford.edu/
http://labs.trolltech.com/page/Projects/Threads/QtConcurrent
下面主要分析分析 Phoenix
Phoenix就是当硬件上有多个CPU或是多核的CPU时,每个CPU上面绑定一个线程,这些线程先map,之后生成的中间结果再reduce,只是mapreduce原理的简单展示,实际使用效果不大,但原理实现的比较直观
typedef struct
{
void * task_data; /* The data to run MapReduce on.
* If splitter is NULL, this should be an array. */
off_t data_size; /* Total # of bytes of data */
int unit_size; /* # of bytes for one element
* (if necessary, on average) */
map_t map; /* Map function pointer, must be user defined */
reduce_t reduce; /* If NULL, identity reduce function is used,
* which emits a keyval pair for each val. */
combiner_t combiner; /* If NULL, no combiner would be called. */
splitter_t splitter; /* If NULL, the array splitter is used.*/
locator_t locator; /* If NULL, no locality based optimization is
performed. */
key_cmp_t key_cmp; /* Key comparison function.
Must be user defined.*/
final_data_t *result; /* Pointer to output data.
* Must be allocated by user */
/*** Optional arguments must be zero if not used ***/
partition_t partition; /* Default partition function is a
* hash function */
/* Creates one emit queue for each reduce task,
* instead of per reduce thread. This improves
* time to emit if data is emitted in order,
* but can increase merge time. */
bool use_one_queue_per_task;
int L1_cache_size; /* Size of L1 cache in bytes */
int num_map_threads; /* # of threads to run map tasks on.
* Default is one per processor */
int num_reduce_threads; /* # of threads to run reduce tasks on.
* Default is one per processor */
int num_merge_threads; /* # of threads to run merge tasks on.
* Default is one per processor */
int num_procs; /* Maximum number of processors to use. */
int proc_offset; /* number of procs to skip for thread binding */
/* (useful if you have multiple MR's running
* and you don't want them binding to the same
* hardware thread). */
float key_match_factor; /* Magic number that describes the ratio of
* the input data size to the output data size.
* This is used as a hint. */
} map_reduce_args_t;
这个结构体是初始化传递参数用的,红颜色表示的是针对不同的应用,自已实现不同的函数,在mapreduce的过程中调用;蓝色的表示不同的执行过程的线程数
int map_reduce (map_reduce_args_t * args)
{
mr_env_t* env;
env = env_init (args);
map (env);
reduce (env);
merge (env);
}
map_reduce ()这个函数中主要是根据输入的参数分别调用map、reduce、merge三个函数,条理比较清楚
mr_env_t这个结构体比较重要,所有的中间结果,以及map、reduce、merge三个过程调用的线程信息,全部在这里面
typedef struct
{
/* Parameters. */
int num_map_tasks; /* # of map tasks. */
int num_reduce_tasks; /* # of reduce tasks. */
int chunk_size; /* # of units of data for each map task. */
int num_procs; /* # of processors to run on. */
int num_map_threads; /* # of threads for map tasks. */
int num_reduce_threads; /* # of threads for reduce tasks. */
int num_merge_threads; /* # of threads for merge tasks. */
float key_match_factor; /* # of values likely to be matched
to the same key. */
bool oneOutputQueuePerMapTask; /* One output queue per map task? */
bool oneOutputQueuePerReduceTask; /* One output queue per reduce task? */
int intermediate_task_alloc_len;
/* Callbacks. */
map_t map; /* Map function. */
reduce_t reduce; /* Reduce function. */
combiner_t combiner; /* Combiner function. */
partition_t partition; /* Partition function. */
splitter_t splitter; /* Splitter function. */
locator_t locator; /* Locator function. */
key_cmp_t key_cmp; /* Key comparator function. */
/* Structures. */
map_reduce_args_t * args; /* Args passed in by the user. */
thread_info_t * tinfo; /* Thread information array. */
keyvals_arr_t **intermediate_vals;
/* Array to send to reduce task. */
keyval_arr_t *final_vals; /* Array to send to merge task. */
keyval_arr_t *merge_vals; /* Array to send to user. */
uintptr_t splitter_pos; /* Tracks position in array_splitter(). */
/* Policy for mapping threads to cpus. */
sched_policy *schedPolicies[TASK_TYPE_TOTAL];
taskQ_t *taskQueue; /* Queues of tasks. */
tpool_t *tpool; /* Thread pool. */
} mr_env_t;
mr_env_t结构体中,比较重要的:一是taskQ_t *taskQueue;保存了子任的信息,另一个是tpool_t *tpool; 保存了所用到的线程的信息;别的字段很多是从map_reduce_args_t结构体中拷贝过来的
typedef struct {
union {
uint64_t v[4];
struct {
uint64_t id;
uint64_t len; //保存任务长度
uint64_t data; //保存任务数据
uint64_t pad;
};
};
} task_t;
typedef struct {
task_t task;
queue_elem_t queue_elem;
} tq_entry_t;
typedef struct {
mr_lock_t parent;
uintptr_t chksum;
mr_lock_t *per_thread;
} tq_lock_t;
struct taskQ_t {
int num_queues;
int num_threads;
queue_t **queues;
queue_t **free_queues;
tq_lock_t *locks;
/* putting all seeds together may lead to extra coherence traffic among cpus
* if it's a problem we can pad it by l1 line size */
/* per-thread random seed */
unsigned int *seeds;
};
taskQ_t 组成了一个队列,任务的信息存储在了task_t之中,
typedef struct {
sem_t sem_run;
unsigned int *num_workers_done;
sem_t *sem_all_workers_done;
thread_func *thread_func;
void **thread_func_arg;
void **ret;
int *num_workers;
int *die;
} thread_arg_t;
struct tpool_t {
int num_threads;
int num_workers;
int die;
thread_func thread_func;
sem_t sem_all_workers_done;
unsigned int num_workers_done;
void **args;
pthread_t *threads;//存储线程数组
thread_arg_t *thread_args;
};
线程池其实就是一个数组,根据CPU的个数决定数组的大小,单个线程的参数和返回结果存在thread_arg_t之中,根据单个条件变量 sem_t *sem_all_workers_done;决定单个线程的执行,当单个线程执行完时,会增加 sem_t sem_all_workers_done;这个总的变量
static void map (mr_env_t* env)
{
thread_arg_t th_arg;
int num_map_tasks;
num_map_tasks = gen_map_tasks (env);
assert (num_map_tasks >= 0);
env->num_map_tasks = num_map_tasks;
if (num_map_tasks < env->num_map_threads)
env->num_map_threads = num_map_tasks;
//printf (OUT_PREFIX "num_map_tasks = %d/n", env->num_map_tasks);
mem_memset (&th_arg, 0, sizeof(thread_arg_t));
th_arg.task_type = TASK_TYPE_MAP;//类型不一样
start_workers (env, &th_arg);
}
map、reduce、merge这三个函数主要是调用start_workers 这个函数,主要是类型不一样
static void
start_workers (mr_env_t* env, thread_arg_t *th_arg)
{
int thread_index;
TASK_TYPE_T task_type;
int num_threads;
int cpu;
intptr_t ret_val;
thread_arg_t **th_arg_array;
void **rets;
#ifdef TIMING
uint64_t work_time = 0;
uint64_t user_time = 0;
uint64_t combiner_time = 0;
#endif
assert(th_arg != NULL);
task_type = th_arg->task_type;
num_threads = getNumTaskThreads (env, task_type);
env->tinfo = (thread_info_t *)mem_calloc (
num_threads, sizeof (thread_info_t));
th_arg->env = env;
th_arg_array = (thread_arg_t **)mem_malloc (
sizeof (thread_arg_t *) * num_threads);
CHECK_ERROR (th_arg_array == NULL);
for (thread_index = 0; thread_index < num_threads; ++thread_index) {
cpu = sched_thr_to_cpu (env->schedPolicies[task_type], thread_index + env->args->proc_offset);
th_arg->cpu_id = cpu;
th_arg->thread_id = thread_index;
th_arg_array[thread_index] = mem_malloc (sizeof (thread_arg_t));
CHECK_ERROR (th_arg_array[thread_index] == NULL);
mem_memcpy (th_arg_array[thread_index], th_arg, sizeof (thread_arg_t));
}
start_thread_pool (
env->tpool, task_type, &th_arg_array[1], num_threads - 1);
dprintf("Status: All %d threads have been created/n", num_threads);
ret_val = (intptr_t)start_my_work (th_arg_array[0]);
#ifdef TIMING
thread_timing_t *timing = (thread_timing_t *)ret_val;
work_time += timing->work_time;
user_time += timing->user_time;
combiner_time += timing->combiner_time;
mem_free (timing);
#endif
mem_free (th_arg_array[0]);
/* Barrier, wait for all threads to finish. */
CHECK_ERROR (tpool_wait (env->tpool));
rets = tpool_get_results (env->tpool);
for (thread_index = 1; thread_index < num_threads; ++thread_index)
{
#ifdef TIMING
ret_val = (intptr_t)rets[thread_index - 1];
thread_timing_t *timing = (thread_timing_t *)ret_val;
work_time += timing->work_time;
user_time += timing->user_time;
combiner_time += timing->combiner_time;
mem_free (timing);
#endif
mem_free (th_arg_array[thread_index]);
}
mem_free (th_arg_array);
mem_free (rets);
#ifdef TIMING
switch (task_type)
{
case TASK_TYPE_MAP:
fprintf (stderr, "map work time: %" PRIu64 "/n",
work_time / num_threads);
fprintf (stderr, "map user time: %" PRIu64 "/n",
user_time / num_threads);
fprintf (stderr, "map combiner time: %" PRIu64 "/n",
combiner_time / num_threads);
break;
case TASK_TYPE_REDUCE:
fprintf (stderr, "reduce work time: %" PRIu64 "/n",
work_time / num_threads);
fprintf (stderr, "reduce user time: %" PRIu64 "/n",
user_time / num_threads);
break;
case TASK_TYPE_MERGE:
fprintf (stderr, "merge work time: %" PRIu64 "/n",
work_time / num_threads);
default:
break;
}
#endif
mem_free(env->tinfo);
dprintf("Status: All tasks have completed/n");
}
start_workers 分配CPU和线程等资源,然后启动线程执行,在线程中分用调用用户定义的
map_t map; /* Map function pointer, must be user defined */
reduce_t reduce; /* If NULL, identity reduce function is used,
* which emits a keyval pair for each val. */
combiner_t combiner; /* If NULL, no combiner would be called. */
splitter_t splitter; /* If NULL, the array splitter is used.*/
locator_t locator; /* If NULL, no locality based optimization is
performed. */
key_cmp_t key_cmp; /* Key comparison function.
Must be user defined.*/
完成计算