0% found this document useful (0 votes)

7 views

Word Count Example

Uploaded by

nkr189

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

7 views

Word Count Example

Uploaded by

nkr189

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 4

//Import the java and hadoop packages

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
//Main Class

//Name of the Main Class or Driver Class within which we have our Mapper and Reducer Class and
//the main Method

public class WordCountNew {

//MAPPER CLASS

//Name of the Mapper Class which inherits Super Class Mapper

//Mapper Class takes 4 Arguments i.e. Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>

public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {

//Defining a local variable one of type IntWritable

private final static IntWritable one = new IntWritable(1);

//Defining a local variable word of type Text

private Text word = new Text();

/*We override the map method which is defined in the Parent (Mapper) Class. It takes 3 arguments
as Inputs map (KEYIN key, VALUEIN value, Context context )

In the map method, we receive a record (single line). It is stored in a string variable line. Using
StringTokenizer, we are breaking the line into individual words called tokens, on the basis of space as
delimiter. If the line was Hello There, StringTokenizer will give two tokens Hello and There. Finally
using the context object we are dumping the Mapper output. So as per our example the Output
from the Mapper will be Hello 1 & There 1 and so on. The Output from the Mapper is taken as
Input by the Reducer */

public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {

//Converting the record (single line) to String and storing it in a String variable line
String line = value.toString();

//StringTokenizer is breaking the record (line) into words

StringTokenizer tokenizer = new StringTokenizer(line);

//Running while loop to get each token(word) one by one from StringTokenizer

while (tokenizer.hasMoreTokens()) { //Saving the token(word) in a variable word

word.set(tokenizer.nextToken());

//Writing the output as (word, one), the value of word will be equal to token and value of one is 1
context.write(word, one); }

//Name of the Reducer Class which inherits Super Class Reducer

//Reducer Class takes 4 Arguments i.e. Reducer <KEYIN, VALUEIN, KEYOUT, VALUEOUT>

public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {

/*We override the reduce method which is defined in the Parent (Reduce) Class. It takes 3
arguments as Inputs reduce (KEYIN key, VALUEIN value, Context context )

In the reduce method, we receive a key as word and a list of values as input. For eg: Hello <1,1,1,1>
So to find out the occurrence of the word Hello in the input file then we simply have to sum all the
values of the list. Hence we run a for loop to iterate over the values one by one and adding it to
variable sum. Finally we will write the output i.e key (word) & value (sum) using the context object.
So as per the above example the output will be: Hello 4 */

public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException,
InterruptedException {

//Defining a local variable sum of type int

int sum = 0;

//Running for loop to iterate over the values present in Iterator

for (IntWritable val : values) {

//We are adding the value to the variable over every iteration

sum = sum + val.get();

//Finally writing the key and the value of sum(number of times the word occurred in the input file)
//to the output file
context.write(key, new IntWritable(sum)); }

//main method known as entry point of the application. This is the method which is called as soon as
//jar is executed

public static void main(String[] args) throws Exception {

//Creating an object of Configuration class, which loads the configuration parameters

Configuration conf = new Configuration();

//Creating the object of Job class and passing the conf object and Job name as arguments. The Job
//class allows the user to configure the job, submit it and control its execution.

Job job = Job.getInstance(conf, "wordcount");

//Setting the jar by finding where a given class came from

job.setJarByClass(WordCountNew.class);

//Setting the key class for job output data

job.setOutputKeyClass(Text.class);

//Setting the value class for job output data

job.setOutputValueClass(IntWritable.class);

//Setting the mapper for the job

job.setMapperClass(Map.class);

//Setting the reducer for the job

job.setReducerClass(Reduce.class);

//Setting the Input Format for the job

job.setInputFormatClass(TextInputFormat.class);

//Setting the Output Format for the job

job.setOutputFormatClass(TextOutputFormat.class);

//Adding a path which will act as a input for MR job. args[0] means it will use the first argument
//written on terminal as input path

FileInputFormat.addInputPath(job, new Path(args[0]));

//Setting the path to a directory where MR job will dump the output. args[1] means it will use the
//second argument written on terminal as output path
FileOutputFormat.setOutputPath(job,new Path(args[1]));

//Submitting the job to the cluster and waiting for its completion

job.waitForCompletion(true); }

Ab Initio Training
No ratings yet
Ab Initio Training
100 pages
Experiment-4 BDA LAB
No ratings yet
Experiment-4 BDA LAB
7 pages
Word Count Program
No ratings yet
Word Count Program
2 pages
Practical 3bcbs
No ratings yet
Practical 3bcbs
5 pages
To Count Using Map and Reduce Program: Wordcount - Java
No ratings yet
To Count Using Map and Reduce Program: Wordcount - Java
2 pages
Big Data Practical 2
No ratings yet
Big Data Practical 2
11 pages
Word Count Program To Demonstrate The Use of Map and Reduce Tasks
No ratings yet
Word Count Program To Demonstrate The Use of Map and Reduce Tasks
5 pages
049
No ratings yet
049
2 pages
BDC Output 3
No ratings yet
BDC Output 3
4 pages
3 MapReduce program ex code
No ratings yet
3 MapReduce program ex code
14 pages
Palak
No ratings yet
Palak
10 pages
Classcreation
No ratings yet
Classcreation
2 pages
✅ PART 1- Install Java and Hadoop on Ubuntu
No ratings yet
✅ PART 1- Install Java and Hadoop on Ubuntu
4 pages
CS246 TA Session: Hadoop Tutorial: Peyman Kazemian 1/11/2011
No ratings yet
CS246 TA Session: Hadoop Tutorial: Peyman Kazemian 1/11/2011
13 pages
1WordCount
No ratings yet
1WordCount
2 pages
Exp 3-Word Count
No ratings yet
Exp 3-Word Count
4 pages
WordCount Program Hadoop Task 2
No ratings yet
WordCount Program Hadoop Task 2
7 pages
579 BDA Week-04
No ratings yet
579 BDA Week-04
1 page
Word Count Example
No ratings yet
Word Count Example
4 pages
Wordcount
No ratings yet
Wordcount
3 pages
Steps to create jar file and execute word count problem in mapper reducer
No ratings yet
Steps to create jar file and execute word count problem in mapper reducer
5 pages
Map Reduce
No ratings yet
Map Reduce
4 pages
Ravikant_Hadoop_file
No ratings yet
Ravikant_Hadoop_file
22 pages
Import Import Import Import Import Import Import Import Public Class Extends Implements
No ratings yet
Import Import Import Import Import Import Import Import Public Class Extends Implements
7 pages
02-Wordcount Mapreduce
No ratings yet
02-Wordcount Mapreduce
5 pages
Ravinder Big Data 4 PDF
No ratings yet
Ravinder Big Data 4 PDF
15 pages
Advanced Mapreduce
No ratings yet
Advanced Mapreduce
37 pages
Exp 4 Word Count
No ratings yet
Exp 4 Word Count
4 pages
Steps: /usr/lib/hadoop-0.20/ Usr/lib/hadoop-0.20/lib
No ratings yet
Steps: /usr/lib/hadoop-0.20/ Usr/lib/hadoop-0.20/lib
4 pages
Hadoop WordCount
No ratings yet
Hadoop WordCount
2 pages
Source Code for Wordcount
No ratings yet
Source Code for Wordcount
3 pages
Running Jar Program
No ratings yet
Running Jar Program
3 pages
Exp-11
No ratings yet
Exp-11
4 pages
Practical 2c
No ratings yet
Practical 2c
2 pages
wc
No ratings yet
wc
13 pages
Practical 2-3
No ratings yet
Practical 2-3
3 pages
BDA3
No ratings yet
BDA3
7 pages
DA Lab Program-2
No ratings yet
DA Lab Program-2
6 pages
Word Count Program
No ratings yet
Word Count Program
3 pages
Part B Assignment - No - 1
No ratings yet
Part B Assignment - No - 1
6 pages
Run Wordcount
No ratings yet
Run Wordcount
3 pages
ADA Lab Manual
No ratings yet
ADA Lab Manual
34 pages
Bdt Lab 6 22mis1067
No ratings yet
Bdt Lab 6 22mis1067
13 pages
Practical 2-1
No ratings yet
Practical 2-1
4 pages
ContarPalabras Java
No ratings yet
ContarPalabras Java
2 pages
Word Count Program With MapReduce and Java
No ratings yet
Word Count Program With MapReduce and Java
6 pages
6 - Simple Wordcount
No ratings yet
6 - Simple Wordcount
2 pages
DSBDA 11
No ratings yet
DSBDA 11
15 pages
Map Reduce Java Program
No ratings yet
Map Reduce Java Program
2 pages
Word Count Program With MapReduce and Java
No ratings yet
Word Count Program With MapReduce and Java
6 pages
Example - (Map Function in Word Count)
No ratings yet
Example - (Map Function in Word Count)
6 pages
Customer - 3.java: Import Import Import Import Import Import Import Import
No ratings yet
Customer - 3.java: Import Import Import Import Import Import Import Import
15 pages
Unit IV Programming Model
No ratings yet
Unit IV Programming Model
30 pages
Developing A Simple Map-Reduce Program For Hadoop: Big Data Course CS6350 Professor: Dr. Latifur Khan
No ratings yet
Developing A Simple Map-Reduce Program For Hadoop: Big Data Course CS6350 Professor: Dr. Latifur Khan
22 pages
Lab3_BigData-MapReduce
No ratings yet
Lab3_BigData-MapReduce
8 pages
BDA MapReduce Program (1)
No ratings yet
BDA MapReduce Program (1)
8 pages
Experiment 6 BDA
No ratings yet
Experiment 6 BDA
4 pages
Map Reduce
No ratings yet
Map Reduce
57 pages
Understanding Software Engineering Vol 3: Programming Basic Software Functionalities.
From Everand
Understanding Software Engineering Vol 3: Programming Basic Software Functionalities.
Gabriel Clemente
No ratings yet
Java Programming Tutorial With Screen Shots & Many Code Example
From Everand
Java Programming Tutorial With Screen Shots & Many Code Example
Desmond Ohwofosirai
No ratings yet
Introduction to PHP, Part 5, Second Edition
From Everand
Introduction to PHP, Part 5, Second Edition
Adam Majczak
No ratings yet
Introduction To C Programming Language
No ratings yet
Introduction To C Programming Language
30 pages
MIT 6.00 Notes From Lessons 1,2 and 3.
No ratings yet
MIT 6.00 Notes From Lessons 1,2 and 3.
8 pages
Invoking Oracle 6i Reports From Oracle 6i Forms Using RUN
No ratings yet
Invoking Oracle 6i Reports From Oracle 6i Forms Using RUN
8 pages
OE1 - Overexcitation Protection Low-Set Stage (OE1Low) High-Set Stage (OE1High)
100% (1)
OE1 - Overexcitation Protection Low-Set Stage (OE1Low) High-Set Stage (OE1High)
23 pages
Producing Readable Output With SQL Plus: Objectives
No ratings yet
Producing Readable Output With SQL Plus: Objectives
34 pages
Rcs8 Um E00 160216 Robostar
No ratings yet
Rcs8 Um E00 160216 Robostar
305 pages
Learn Godot's Visual Scripting Basic
100% (1)
Learn Godot's Visual Scripting Basic
62 pages
Computer Applications - Notes
100% (1)
Computer Applications - Notes
25 pages
PyRINEX Manuel
No ratings yet
PyRINEX Manuel
14 pages
Function Calling - OpenAI API
No ratings yet
Function Calling - OpenAI API
5 pages
Instant download A comprehensive introduction to object oriented programming with Java 1st Edition C. Thomas Wu pdf all chapter
100% (13)
Instant download A comprehensive introduction to object oriented programming with Java 1st Edition C. Thomas Wu pdf all chapter
60 pages
Customizing View
No ratings yet
Customizing View
66 pages
S7-300 Module Specification
No ratings yet
S7-300 Module Specification
564 pages
Chapter 6 - MySQL Function
No ratings yet
Chapter 6 - MySQL Function
43 pages
Mach2 6.11 Custom
0% (1)
Mach2 6.11 Custom
86 pages
Help - Personal Communications
No ratings yet
Help - Personal Communications
136 pages
Devmode Api / Printerinfo Manual: Advanced Printer Driver For Tm-T88Iv Restick Ver.4
No ratings yet
Devmode Api / Printerinfo Manual: Advanced Printer Driver For Tm-T88Iv Restick Ver.4
28 pages
OBJECT ORIENTED PROG in C++ Saurav Sahay
No ratings yet
OBJECT ORIENTED PROG in C++ Saurav Sahay
485 pages
Alfa User Guide
No ratings yet
Alfa User Guide
34 pages
ActiveGcodeChecker_E
No ratings yet
ActiveGcodeChecker_E
22 pages
Internet of Things With Python - Sample Chapter
100% (1)
Internet of Things With Python - Sample Chapter
34 pages
Manual: For Servo Amplifiers (DS, DPC) and Battery Drives (Bamobil-D, Bamocar)
No ratings yet
Manual: For Servo Amplifiers (DS, DPC) and Battery Drives (Bamobil-D, Bamocar)
144 pages
Codeigniter User Guide 1 5 5
No ratings yet
Codeigniter User Guide 1 5 5
362 pages
Datastage Points
No ratings yet
Datastage Points
26 pages
TM262L10MESE8T Notice R
No ratings yet
TM262L10MESE8T Notice R
48 pages
TAFJ-DB Tools
No ratings yet
TAFJ-DB Tools
80 pages
Siebel 8.1.1.x and BI Publusher
No ratings yet
Siebel 8.1.1.x and BI Publusher
15 pages
Brother PT-1650 PDF
No ratings yet
Brother PT-1650 PDF
152 pages
TechnicalReference SD
No ratings yet
TechnicalReference SD
50 pages

Word Count Example

Uploaded by

Word Count Example

Uploaded by

//Import the java and hadoop packages

public class WordCountNew {

//Name of the Mapper Class which inherits Super Class Mapper

//Mapper Class takes 4 Arguments i.e. Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>

public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {

//Defining a local variable one of type IntWritable

private final static IntWritable one = new IntWritable(1);

//Defining a local variable word of type Text

private Text word = new Text();

//StringTokenizer is breaking the record (line) into words

StringTokenizer tokenizer = new StringTokenizer(line);

while (tokenizer.hasMoreTokens()) { //Saving the token(word) in a variable word

//Name of the Reducer Class which inherits Super Class Reducer

public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {

//Defining a local variable sum of type int

//Running for loop to iterate over the values present in Iterator

for (IntWritable val : values) {

sum = sum + val.get();

public static void main(String[] args) throws Exception {

//Creating an object of Configuration class, which loads the configuration parameters

Configuration conf = new Configuration();

Job job = Job.getInstance(conf, "wordcount");

//Setting the jar by finding where a given class came from

//Setting the key class for job output data

//Setting the value class for job output data

//Setting the mapper for the job

//Setting the reducer for the job

//Setting the Input Format for the job

//Setting the Output Format for the job

FileInputFormat.addInputPath(job, new Path(args[0]));

You might also like