import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class WordCountMapper extends MapReduceBase implements Mapper {

    private final static Text word = new Text();
    private final static IntWritable count = new IntWritable(1);
    public void map(WritableComparable key, Writable values,
		    OutputCollector output, Reporter reporter) throws IOException {
	String line = values.toString();
	StringTokenizer itr = new StringTokenizer(line.toLowerCase(), " \t.!?:()[],'&-;|0123456789");
	while(itr.hasMoreTokens()) {
	    word.set(itr.nextToken());
	    output.collect(word, count);
	}
    }

}
