环境
- JDK 1.8
- Intellij Idea 2018.1
- Hadoop 2.6.0 (本地未安装Hadoop)
- maven 3.5.4
创建word count项目
- 在idea 中新建maven java项目(配置maven jdk略)

配置pom依赖
- pom.xml文件
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
</dependencies>
-
创建mapper类
package com.lens.task;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.StringTokenizer;
/**
* @author lens
* @create 2020-02-25 10:24
*/
public class VoteCountMapper extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer words = new StringTokenizer(value.toString());
while (words.hasMoreTokens()) {
word.set(words.nextToken());
context.write(word, one);
}
}
}
-
创建reducer类
package com.lens.task;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @author lens
* @create 2020-02-25 10:24
*/
public class VoteCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0;
for (IntWritable value : values) {
count += value.get();
}
result.set(count);
context.write(key, result);
}
}
-
创建voteCount 驱动类
package com.lens.task;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @author lens
* @create 2020-02-25 10:22
*/
public class VoteCount extends Configured implements Tool {
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(),new VoteCount(),args);
System.exit(res);
}
public int run(String[] args) throws Exception {
if (args.length !=2){
System.out.println("Incorrect input, expected: [input] [output]");
System.exit(-1);
}
Configuration conf = this.getConf();
Job job = new Job(conf, "word count");
job.setJarByClass(VoteCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(VoteCountMapper.class);
job.setReducerClass(VoteCountReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputValueClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.submit();
return job.waitForCompletion(true) ? 0 : 1;
}
}
注意:此处File Format需要导入的是lib下的包
5. 创建wordcount文件输入目录input。文件字符进行计数,输出计数的结果 output。
首先需要配置输入路径,这里在项目下(src同级目录)新建一个文件夹input,并添加一个或多个文本文件到input中(已上传)作为示例。

需要注意:File->Project Structure,在弹出来的对话框中选择Modules项,这里将input文件夹标记为Excluded。

配置运行参数
此处需要配置程序运行时的Main class和VoteCount需要的输入input 输出output路径。
在Intellij菜单栏中选择Run->Edit Configurations,在弹出来的对话框中点击 +,新建一个Application配置。配置Main class为VoteCount(可以点击右边的...选择),Program arguments为input/ output/,即输入路径为创建的input文件夹,输出为output(可以不配)

运行
配置完成后,点击菜单栏Run->Run 'VoteCount'即开始运行此MapReduce程序,待程序运行结束后,左上方会出现文件夹output,其中的part-r-00000就是运行的结果!

输入文件

运行结果

|