WordCount version one

package org.myorg;

import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger;

public class WordCount extends Configured implements Tool { private static final Logger LOG = Logger.getLogger(WordCount.class); public static void main(String[] args) throws Exception { int res = ToolRunner.run(new WordCount(), args); System.exit(res); }

public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf(), "wordcount"); job.setJarByClass(this.getClass()); // Use TextInputFormat by default, or use job.setInputFormatClass FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }

public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); private long numRecords = 0;

`public void map(LongWritable offset, Text lineText, Context context)`
    `throws IOException, InterruptedException {`
  `String line = lineText.toString();`
  `StringTokenizer tokenizer = new StringTokenizer(line);`
  `while (tokenizer.hasMoreTokens()) {`
    `word.set(tokenizer.nextToken());`
    `context.write(word, one);`
    `context.getCounter(Counters.INPUT_WORDS).increment(1);`
  `}`
`}`
`private enum Counters {INPUT_WORDS}`

} public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { @Override public void reduce(Text word, Iterable<IntWritable> counts, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable count : counts) { sum += count.get(); } context.write(word, new IntWritable(sum)); } } }

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

WordCount version one

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Clone this wiki locally