MapReduce Programs
MapReduce Programs
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
}
Reducer Code:
package wordcount;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
Driver Code:
package wordcount;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(args[1]));
job.waitForCompletion(true);
1. Now you have to make a jar file. Right Click on Project-> Click on Export-> Select
export destination as Jar File-> Name the jar File(WordCount.jar) -> Click on
next -> at last Click on Finish.
2. Now copy this file into the Workspace directory of Cloudera.
3. Copy the input file into Local filesystem to hdfs filesystem.
4. Ex: hdfs dfs -put <source-path> <destination-path>.
5. Ex: hdfs dfs -put wordcount.txt /user/cloudera/Input/hfs
Command:
hadoop jar <jar-filename> <package-name>.<class-name> <Input-file-path> <output-
file-path>
Example:
Hadoop jar WordCount.jar WordCount.WordCountDriver
/user/cloudera/Input/hfs/wordcount.txt /user/cloudera/Input/output
2. Implementation of Matrix Multiplication with Hadoop Mapreduce
Driver Code:
package MatrixMultiply;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class MatrixMultiply {
public static void main(String[] args) throws Exception { if (args.length != 2)
{
System.err.println("Usage: MatrixMultiply <in_dir> <out_dir>");
System.exit(2);
}
Configuration conf = new Configuration();
conf.set("m", "1000");
conf.set("n", "100");
conf.set("p", "1000");
@SuppressWarnings("deprecation")
Job job = new Job(conf, "MatrixMultiply");
job.setJarByClass(MatrixMultiply.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
Mapper Code:
package MatrixMultiply;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class Map extends Mapper<LongWritable, Text, Text, Text>
{
@Override
public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException
{
Configuration conf = context.getConfiguration();
int m = Integer.parseInt(conf.get("m"));
int p = Integer.parseInt(conf.get("p"));
String line = value.toString();
// (M, i, j, Mij);
String[] indicesAndValue = line.split(",");
Text outputKey = new Text();
Text outputValue = new Text();
if (indicesAndValue[0].equals("M"))
{
for (int k = 0; k < p; k++)
{
outputKey.set(indicesAndValue[1] + "," + k);
// outputKey.set(i,k);
outputValue.set(indicesAndValue[0] + "," + indicesAndValue[2] + "," +
indicesAndValue[3]);
// outputValue.set(M,j,Mij);
context.write(outputKey, outputValue);
}
} else {
// (N, j, k, Njk);
for (int i = 0; i < m; i++)
{
outputKey.set(i + "," + indicesAndValue[2]); outputValue.set("N," +
indicesAndValue[1] + "," + indicesAndValue[3]); context.write(outputKey, outputValue);
}
}
}
}
Reducer Code:
package MatrixMultiply;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.HashMap;
public class Reduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException ,
InterruptedException {
String[] value;
//key=(i,k),
//Values = [(M/N,j,V/W),..]
HashMap<Integer, Float> hashA = new HashMap<Integer, Float>();
HashMap<Integer, Float> hashB = new HashMap<Integer, Float>();
for (Text val : values) {
value = val.toString().split(",");
if (value[0].equals("M")) {
hashA.put(Integer.parseInt(value[1]), Float.parseFloat(value[2]));
}
else {
hashB.put(Integer.parseInt(value[1]), Float.parseFloat(value[2]));
}
}
int n = Integer.parseInt(context.getConfiguration().get("n"));
float result = 0.0f;
float m_ij;
float n_jk;
for (int j = 0; j < n; j++) {
m_ij = hashA.containsKey(j) ? hashA.get(j) : 0.0f;
n_jk = hashB.containsKey(j) ? hashB.get(j) : 0.0f;
result += m_ij * n_jk;
}
if (result != 0.0f) {
context.write(null, new Text(key.toString() + "," + Float.toString(result)));
}
}
}
Input Files:
M.txt
M,0,0,1
M,0,1,2
M,1,0,3
M,1,1,4
N.txt
N,0,0,5
N,0,1,6
N,1,0,7
N,1,1,8
3.Implementation of weather mining by taking weather data set using Map Reduce
Mapper Code:
Package Maxmin;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
//Converting the record (single line) to String and storing it in a String variable line
if (!(line.length() == 0))
{
Reducer Code:
/**
*MaxTemperatureReducer class is static and extends Reducer abstract class
having four hadoop generics type Text, Text, Text, Text.
*/
/**
* @method reduce
* This method takes the input as key and list of values pair from mapper, it
does aggregation
* based on keys and produces the final context.
*/
/**
* @method main
* This method is used for setting all the configuration properties.
* It acts as a driver for map reduce code.
*/
Driver Code:
//reads the default configuration of cluster from the configuration xml files
Configuration conf = new Configuration();
//Initializing the job with the default configuration of the cluster
Job job = new Job(conf, "weather example");
//Defining input Format class which is responsible to parse the dataset into a key value pair
job.setInputFormatClass(TextInputFormat.class);
//Defining output Format class which is responsible to parse the dataset into a key value pair
job.setOutputFormatClass(TextOutputFormat.class);
//Configuring the input path from the filesystem into the job
FileInputFormat.addInputPath(job, new Path(args[0]));
//Configuring the output path from the filesystem into the job
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//deleting the context path automatically from hdfs so that we don't have delete it explicitly
OutputPath.getFileSystem(conf).delete(OutputPath);
}
}