MapReduce :分布式计算框架
通常情况下,一个 MR 作业是有 2
个部分构成:MapTask ReduceTask(可以没有)
主要有三个:
与其说编程规范,编程模板更容易理解。
public class WordCount {
public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
String str = value.toString();
String[] split = str.split(",");
IntWritable ONE = new IntWritable(1);
for (String word : split) {
context.write(new Text(word), ONE);
}
}
}
public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int count = 0;
for (IntWritable value : values) {
count = count + value.get();
}
context.write(key, new IntWritable(count));
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration configuration = new Configuration();
// 可以是文件夹,那么会统计文件夹下所有的文件
String sourcePath = "data/wc.data";
String distPath = "downloadOut/wc-out.data";
FileUtil.deleteIfExist(configuration, distPath);
Job job = Job.getInstance(configuration, "word count");
job.setJarByClass(WordCount.class);
job.setCombinerClass(WordCountReducer.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(sourcePath));
FileOutputFormat.setOutputPath(job, new Path(distPath));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
public static void deleteIfExist(Configuration configuration, String distPath) throws IOException {
FileSystem fileSystem = FileSystem.get(configuration);
if (fileSystem.exists(new Path(distPath))) {
fileSystem.delete(new Path(distPath), true);
}
fileSystem.close();
}
至此 MapReduce实践-单词统计 就结束了,如有疑问,欢迎评论区留言。