0% found this document useful (0 votes)
2 views

Source Code for Wordcount

The document contains Java source code for a Hadoop MapReduce program that performs word counting. It includes a driver class, a mapper class that tokenizes input text, and a reducer class that sums the occurrences of each word. The program is structured to take input and output directories as arguments and utilizes Hadoop's framework for processing large datasets.

Uploaded by

shravya.pattiri
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

Source Code for Wordcount

The document contains Java source code for a Hadoop MapReduce program that performs word counting. It includes a driver class, a mapper class that tokenizes input text, and a reducer class that sums the occurrences of each word. The program is structured to take input and output directories as arguments and utilizes Hadoop's framework for processing large datasets.

Uploaded by

shravya.pattiri
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

//Source Code for Wordcount:

//Driver Code:

package naiduwc;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount {

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf, "WordCount");

job.setJarByClass(Wordc.WordCount.class);

// TODO: specify a mapper

job.setMapperClass(WordCountMapper.class);

// TODO: specify a reducer

job.setReducerClass(WordCountReducer.class);

// TODO: specify output types

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);
// TODO: specify input and output DIRECTORIES (not files)

FileInputFormat.setInputPaths(job, new Path(args[1]));

FileOutputFormat.setOutputPath(job, new Path(args[2]));

if (!job.waitForCompletion(true))

return;

//Mapper code:

package naiduwc;

import java.util.*;

import java.io.*;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.io.IntWritable;

public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>

public void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {

Text word=new Text();

String line=value.toString();

StringTokenizer s=new StringTokenizer(line);

while(s.hasMoreTokens())

{
word.set(s.nextToken());

context.write(word,new IntWritable(1));

//Reducer Code:

package naiduwc;

import java.util.*;

import java.io.IOException;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer<Text, IntWritable ,Text, IntWritable>

public void reduce(Text key, Iterable<IntWritable> value, Context context)

throws IOException, InterruptedException

int sum=0;

for (IntWritable values : value) {

sum=sum+values.get();

context.write(key, new IntWritable(sum));

You might also like