Java CustomWritables
Java CustomWritables
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
public name(){
this.first = new Text();
this.middle = new Text();
this.last = new Text();
}
public name(Text f,Text m,Text l){ Overloading different constructors
this.first = f;
this.middle = m;
this.last = l;
}
public name(String f,String m,String l){
this.first = new Text(f);
this.middle = new Text(m);
this.last = new Text(l);
}
@Override
Our value class must
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub override two methods:
first.readFields(in);
middle.readFields(in); 1) readFields()
last.readFields(in); 2) write()
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
first.write(out);
middle.write(out);
last.write(out);
}
public Text getName(){
return new Text(first.toString()+" "+middle.toString()+" "+last.toString());
}
}
Defining key class “patent”
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable; Your value class should import
import org.apache.hadoop.io.Text;
“WritableComparable” interface
import org.apache.hadoop.io.WritableComparable;
import java.io.IOException;
import java.util.StringTokenizer;
enum missing{ COUNTRY, “total” counter keeps track of written and skipped
FIRST, recoreds.
MIDDLE,
LAST
}
enum Total{ COUNT,
WRITTEN, Our defined key and value classes as parameters to mapper
SKIPPED}
public class map_class extends Mapper<LongWritable, Text, patent, name> {
public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException{
String line = value.toString();
StringTokenizer tokens = new StringTokenizer(line,",");
long pat_no = 0;
String last=" ",first=" ",middle=" ",country=" ";
String token=null;
token = tokens.nextToken();
if(token.length()!=8){
pat_no = Long.parseLong(token.substring(0, token.length()));
token = tokens.nextToken();
if(token.length()>1)
last = token.substring(1, token.length()-1);
else
context.getCounter(missing.LAST).increment(1);
token = tokens.nextToken();
if(token.length()>1)
first = token.substring(1, token.length()-1);
else
Extracting context.getCounter(missing.FIRST).increment(1);
token = tokens.nextToken();
required fields for if(token.length()>1)
our custom middle = token.substring(1, token.length()-1);
else
defined key and
context.getCounter(missing.MIDDLE).increment(1);
value pairs using for(int i = 0 ;i < 5;i++){
substring() token = tokens.nextToken();
}
method. if(token.length()>1)
country = token.substring(1, token.length()-1);
else
context.getCounter(missing.COUNTRY).increment(1);
patent p = new patent(pat_no,country);
name n = new name(first,middle,last);
context.write(p,n);
context.getCounter(Total.WRITTEN).increment(1);}
else
context.getCounter(Total.SKIPPED).increment(1);
context.getCounter(Total.COUNT).increment(1);
}
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.*
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(map_class.class);
job.setReducerClass(reduce_class.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(patent.class);
job.setMapOutputValueClass(name.class); Defining out custom key and value classes
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
System.exit(job.waitForCompletion(true)?0:1);
}}
Sample Input