WordCount Version Two

package org.myorg;

import java.io.IOException;
import java.util.StringTokenizer;
// Import the Configuration class for WordCount version 2
import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

public class WordCount extends Configured implements Tool {

  private static final Logger LOG = Logger.getLogger(WordCount.class);

  public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new WordCount(), args);
    System.exit(res);
  }

  public int run(String[] args) throws Exception {
	  
    Job job = Job.getInstance(getConf(), "wordcount");

    job.setJarByClass(this.getClass());    
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }

  public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

// Add this boolean variable for the caseSensitive setting.
    private boolean caseSensitive = false;
   
// Add the setup() method for WordCount2. This will capture the value
// of the caseSensitive argument from the command line. Going forward, case sensitivity
// is disabled by default.

    protected void setup(Mapper.Context context)
        throws IOException,
        InterruptedException 
    {
      Configuration config = context.getConfiguration();
      this.caseSensitive = config.getBoolean("wordcount.case.sensitive", false);
    }
    
// End of the setup() method for WordCount2.

    public void map(LongWritable offset, Text lineText, Context context)
        throws IOException, InterruptedException {

      String line = lineText.toString();

// Add this if statement for WordCount2. If case sensitivity is false,
// change each line to lowercase before running it through the
// StringTokenizer.

      if (!caseSensitive) {
        line = line.toLowerCase();
      }
      
// End of update for WordCount2

      StringTokenizer tokenizer = new StringTokenizer(line);
      while (tokenizer.hasMoreTokens()) {
        word.set(tokenizer.nextToken());
        context.write(word, one);
        context.getCounter(Counters.INPUT_WORDS).increment(1);
      }
    }

    private enum Counters {INPUT_WORDS}
  }

  public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
    @Override
    public void reduce(Text word, Iterable<IntWritable> counts, Context context)
        throws IOException, InterruptedException {
      int sum = 0;
      for (IntWritable count : counts) {
        sum += count.get();
      }
      context.write(word, new IntWritable(sum));
    }
  }
}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

WordCount Version Two

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Clone this wiki locally