0% found this document useful (0 votes)

30 views3 pages

Practical 2-3

Hadoop

Uploaded by

warlord 56

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

30 views3 pages

Practical 2-3

Hadoop

Uploaded by

warlord 56

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 3

Practical 2-3: Patent data files consist of patent id and sub patent id.

One patent is associated with each

sub patents. Write a map reduce code to find out the total sub patent associated with the patent.

package in.project.mapreduce;

import java.io.IOException;
import java.util.StringTokenizer;

/* All org.apache.hadoop packages can be imported using the jar present in lib directory of this java project. */

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

/* The Patent program finds the number of sub-patents associated with each id in the provided input file. We write a
map reduce code to achieve this, where mapper makes key value pair from the input file and reducer does
aggregation on this key value pair. */

public class Patent {

/*Map class is static and extends MapReduceBase and implements Mapper interface having four hadoop generics
type LongWritable, Text, Text, Text. */

public static class Map extends

Mapper<LongWritable, Text, Text, Text> {

//Mapper

/*This method takes the input as text data type and and tokenizes input by taking whitespace as delimiter. Now
key value pair is made and this key value pair is passed to reducer. @method_arguments key, value, output, reporter
@return void */

//Defining a local variable K of type Text

Text k= new Text();

//Defining a local variable v of type Text

Text v= new Text();

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

//Converting the record (single line) to String and storing it in a String variable line
String line = value.toString();
//StringTokenizer is breaking the record (line) according to the delimiter whitespace
StringTokenizer tokenizer = new StringTokenizer(line," ");

//Iterating through all the tokens and forming the key value pair

while (tokenizer.hasMoreTokens()) {

/* The first token is going in jiten, second token in jiten1, third token in jiten,fourth token in jiten1 and so on. */

String jiten= tokenizer.nextToken();

k.set(jiten);
String jiten1= tokenizer.nextToken();
v.set(jiten1);

//Sending to output collector which inturn passes the same to reducer

context.write(k,v);
}
}
}

//Reducer
/* Reduce class is static and extends MapReduceBase and implements Reducer interface having four hadoop
generics type Text, Text, Text, IntWritable. */

public static class Reduce extends Reducer<Text, Text, Text, IntWritable> {

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

//Defining a local variable sum of type int

int sum = 0;

/* Iterates through all the values available with a key and add them together and give the final result as the
key and sum of its values */

for(Text x : values)
{
sum++;
}

//Dumping the output in context object

context.write(key, new IntWritable(sum));
}
}

/*Driver
\* This method is used for setting all the configuration properties. It acts as a driver for map reduce code.
@return void, @method_arguments args, @throws Exception */

public static void main(String[] args) throws Exception {

//reads the default configuration of cluster from the configuration xml files
Configuration conf = new Configuration();
//Initializing the job with the default configuration of the cluster
Job job = new Job(conf, "patent");
//Assigning the driver class name
job.setJarByClass(Patent.class);

//Defining the mapper class name

job.setMapperClass(Map.class);

//Defining the reducer class name

job.setReducerClass(Reduce.class);

//Explicitly setting the out key/value type from the mapper if it is not same as that of reducer
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);

//Defining the output key class for the final output i.e. from reducer
job.setOutputKeyClass(Text.class);

//Defining the output value class for the final output i.e. from reducer
job.setOutputValueClass(IntWritable.class);

//Defining the output key class for the final output i.e. from reducer
job.setOutputKeyClass(Text.class);

//Defining the output value class for the final output i.e. from reducer
job.setOutputValueClass(Text.class);

//Defining input Format class which is responsible to parse the dataset into a key value pair
job.setInputFormatClass(TextInputFormat.class);

//Defining output Format class which is responsible to parse the final key-value output from MR framework to a
text file into the hard disk
job.setOutputFormatClass(TextOutputFormat.class);

//setting the second argument as a path in a path variable

Path outputPath = new Path(args[1]);

//Configuring the input/output path from the filesystem into the job
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

//deleting the output path automatically from hdfs so that we don't have delete it explicitly
outputPath.getFileSystem(conf).delete(outputPath);

//exiting the job only if the flag value becomes false

System.exit(job.waitForCompletion(true) ? 0 : 1);

}
}

Output:
Patent Number of Associated Sub-patents
1 13
2 10
3 4

CARESCAPE Central Station: Technical Manual
No ratings yet
CARESCAPE Central Station: Technical Manual
372 pages
Windows 11 Migration Guide - 4 Best Practices When Upgrading
No ratings yet
Windows 11 Migration Guide - 4 Best Practices When Upgrading
6 pages
Foxboro DCS: System Definition V3.6
100% (1)
Foxboro DCS: System Definition V3.6
20 pages
Classcreation
No ratings yet
Classcreation
2 pages
Word Count Example
No ratings yet
Word Count Example
4 pages
Palak
No ratings yet
Palak
10 pages
CS246 TA Session: Hadoop Tutorial: Peyman Kazemian 1/11/2011
No ratings yet
CS246 TA Session: Hadoop Tutorial: Peyman Kazemian 1/11/2011
13 pages
Cloud LAB 10.1,11.1,12.1
No ratings yet
Cloud LAB 10.1,11.1,12.1
6 pages
Exp 4 Word Count
No ratings yet
Exp 4 Word Count
4 pages
Customer - 3.java: Import Import Import Import Import Import Import Import
No ratings yet
Customer - 3.java: Import Import Import Import Import Import Import Import
15 pages
BDA Output
No ratings yet
BDA Output
32 pages
Java CustomWritables
No ratings yet
Java CustomWritables
6 pages
Prácticas Bigdata: 1. Lanzar Un Proceso Mapreduce Contra El Cluster
No ratings yet
Prácticas Bigdata: 1. Lanzar Un Proceso Mapreduce Contra El Cluster
3 pages
Hadoop Mini Project
No ratings yet
Hadoop Mini Project
8 pages
Import Import Import Import Import Import Import Import Public Class Extends Implements
No ratings yet
Import Import Import Import Import Import Import Import Public Class Extends Implements
7 pages
MapReduce Example
No ratings yet
MapReduce Example
3 pages
Group A 2nd
No ratings yet
Group A 2nd
3 pages
Big Data Practical 2
No ratings yet
Big Data Practical 2
11 pages
02-Wordcount Mapreduce
No ratings yet
02-Wordcount Mapreduce
5 pages
B1 Instructions
No ratings yet
B1 Instructions
9 pages
6 - Simple Wordcount
No ratings yet
6 - Simple Wordcount
2 pages
Hadoop Mapred
100% (1)
Hadoop Mapred
11 pages
Exp 3 4
No ratings yet
Exp 3 4
7 pages
Word Count Program
No ratings yet
Word Count Program
2 pages
Annagrame
No ratings yet
Annagrame
3 pages
BDA MapReduce Program
No ratings yet
BDA MapReduce Program
8 pages
BDA Exp Removed Removed
No ratings yet
BDA Exp Removed Removed
33 pages
Map Reduce
No ratings yet
Map Reduce
4 pages
Experiment-4 BDA LAB
No ratings yet
Experiment-4 BDA LAB
7 pages
MapReduce Programs
No ratings yet
MapReduce Programs
10 pages
Advanced Mapreduce
No ratings yet
Advanced Mapreduce
37 pages
BDC Output 3
No ratings yet
BDC Output 3
4 pages
Lecture 04
No ratings yet
Lecture 04
25 pages
Merge Files Store in A Directory To A File
No ratings yet
Merge Files Store in A Directory To A File
3 pages
BDF Programs
No ratings yet
BDF Programs
32 pages
MR Progs For Self Excercise
No ratings yet
MR Progs For Self Excercise
14 pages
All
No ratings yet
All
11 pages
BDAV Practical
No ratings yet
BDAV Practical
17 pages
19Nh14 102190051 Lab13 Chương Trình MapReduce Shortest Path Using Parallel Breadth First Search BFS 02
No ratings yet
19Nh14 102190051 Lab13 Chương Trình MapReduce Shortest Path Using Parallel Breadth First Search BFS 02
16 pages
Dsa Prac 5 19DCS038
No ratings yet
Dsa Prac 5 19DCS038
16 pages
Part B Assignment - No - 1
No ratings yet
Part B Assignment - No - 1
6 pages
Hadoop Wordcount Program
No ratings yet
Hadoop Wordcount Program
20 pages
ADA Lab Manual
No ratings yet
ADA Lab Manual
34 pages
Evenoddsum Program
No ratings yet
Evenoddsum Program
2 pages
Map Reduce
No ratings yet
Map Reduce
57 pages
Steps To Create Jar File and Execute Word Count Problem in Mapper Reducer
No ratings yet
Steps To Create Jar File and Execute Word Count Problem in Mapper Reducer
5 pages
3 MapReduce Program Ex Code
No ratings yet
3 MapReduce Program Ex Code
14 pages
DA Lab Program-3
No ratings yet
DA Lab Program-3
9 pages
Sribharanitharan.M 71762234049
No ratings yet
Sribharanitharan.M 71762234049
2 pages
BDT Lab 6 22mis1067
No ratings yet
BDT Lab 6 22mis1067
13 pages
Step 2 - First MapReduce Program
No ratings yet
Step 2 - First MapReduce Program
25 pages
Lab3 BigData-MapReduce
No ratings yet
Lab3 BigData-MapReduce
8 pages
Int312 Ca
No ratings yet
Int312 Ca
10 pages
Dsbda Group B 1
No ratings yet
Dsbda Group B 1
5 pages
DSBDA GRP B 1
No ratings yet
DSBDA GRP B 1
8 pages
MapReduce - Notes
No ratings yet
MapReduce - Notes
17 pages
Wrordcount
No ratings yet
Wrordcount
2 pages
DSBDA GRP B 1
No ratings yet
DSBDA GRP B 1
8 pages
Map Reduce
No ratings yet
Map Reduce
5 pages
Practical 2-2
No ratings yet
Practical 2-2
9 pages
Mapreduce Introduction
No ratings yet
Mapreduce Introduction
14 pages
Maintenance Boot Key Usage
No ratings yet
Maintenance Boot Key Usage
2 pages
Nikhil Resume PDF
No ratings yet
Nikhil Resume PDF
1 page
Sandeep Gorle Hyd
No ratings yet
Sandeep Gorle Hyd
2 pages
Ad3311 Set 1
No ratings yet
Ad3311 Set 1
2 pages
SF Dump
No ratings yet
SF Dump
17 pages
NPM Vs PNPM Vs YARN
No ratings yet
NPM Vs PNPM Vs YARN
2 pages
Co Forge
No ratings yet
Co Forge
2 pages
Cgu Es
No ratings yet
Cgu Es
2 pages
Manoj Jayakumar: Experience Summary
No ratings yet
Manoj Jayakumar: Experience Summary
7 pages
Tle 9 Ict Quarter 3 Module 6 Estrada
No ratings yet
Tle 9 Ict Quarter 3 Module 6 Estrada
12 pages
Chord 2go Manual
No ratings yet
Chord 2go Manual
28 pages
Cds QB WITH ANSWER 22082022
No ratings yet
Cds QB WITH ANSWER 22082022
97 pages
10th Comp Preboard 2025
No ratings yet
10th Comp Preboard 2025
2 pages
Lec 30-31 NAT-DHCP-IPv6-ICMP
No ratings yet
Lec 30-31 NAT-DHCP-IPv6-ICMP
34 pages
Stack and Queue Exam Based Questions
No ratings yet
Stack and Queue Exam Based Questions
3 pages
The OSI Model Module 3
No ratings yet
The OSI Model Module 3
4 pages
Open Tuna
No ratings yet
Open Tuna
5 pages
DES-F1006P-E QIG AU v2.1
No ratings yet
DES-F1006P-E QIG AU v2.1
4 pages
Policies Book Xe Sdwan
No ratings yet
Policies Book Xe Sdwan
230 pages
Noctua PWM Specifications White Paper
No ratings yet
Noctua PWM Specifications White Paper
8 pages
IEEE 1149.6 - A Practical Perspective
No ratings yet
IEEE 1149.6 - A Practical Perspective
9 pages
Honey, I Shrunk The LLM! A Beginner's Guide To Quantization - The Register
No ratings yet
Honey, I Shrunk The LLM! A Beginner's Guide To Quantization - The Register
11 pages
DCDF1A04-2426350 - Jayden Kee - Practical 02
No ratings yet
DCDF1A04-2426350 - Jayden Kee - Practical 02
16 pages
8. DS EN领克ODX使用快速入门 (海外) -
No ratings yet
8. DS EN领克ODX使用快速入门 (海外) -
15 pages
AWS Mock Test-5
No ratings yet
AWS Mock Test-5
34 pages
Coal Hauling
No ratings yet
Coal Hauling
8 pages
06 PAS Essentials Integrations
No ratings yet
06 PAS Essentials Integrations
13 pages

Practical 2-3

Uploaded by

Practical 2-3

Uploaded by

Practical 2-3: Patent data files consist of patent id and sub patent id.

One patent is associated with each

public class Patent {

public static class Map extends

//Defining a local variable K of type Text

//Defining a local variable v of type Text

String jiten= tokenizer.nextToken();

//Sending to output collector which inturn passes the same to reducer

public static class Reduce extends Reducer<Text, Text, Text, IntWritable> {

//Defining a local variable sum of type int

//Dumping the output in context object

public static void main(String[] args) throws Exception {

//Defining the mapper class name

//Defining the reducer class name

//setting the second argument as a path in a path variable

//exiting the job only if the flag value becomes false

You might also like