0% found this document useful (0 votes)
341 views3 pages

Merge Files Store in A Directory To A File

This Java code uses MapReduce to merge files stored in a local directory into a single file stored in HDFS. It opens input files from the local filesystem, reads the contents in blocks, and writes these blocks to the output file on HDFS. Any exceptions during this process are printed.

Uploaded by

msodhani
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
341 views3 pages

Merge Files Store in A Directory To A File

This Java code uses MapReduce to merge files stored in a local directory into a single file stored in HDFS. It opens input files from the local filesystem, reads the contents in blocks, and writes these blocks to the output file on HDFS. Any exceptions during this process are printed.

Uploaded by

msodhani
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 3

Merge files store in a directory to a file

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class PutMerge {
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
FileSystem local = FileSystem.getLocal(conf);
Path inputDir = new Path(args[0]);
Path hdfsFile = new Path(args[1]);
try {
FileStatus[] inputFiles = local.listStatus(inputDir);
FSDataOutputStream out = hdfs.create(hdfsFile);
for (int i=0; i<inputFiles.length; i++) {
System.out.println(inputFiles[i].getPath().getName());
FSDataInputStream in = local.open(inputFiles[i].getPath());
byte buffer[] = new byte[256];
int bytesRead = 0;
while( (bytesRead = in.read(buffer)) > 0) {
out.write(buffer, 0, bytesRead);
}
in.close();
}
out.close();
} catch (IOException e) {
e.printStackTrace();
}}}
Copy contains of a file in another file with , sign
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
public class MyJob extends Configured implements Tool {
public static class MapClass extends MapReduceBase implements Mapper<Text, Text, Text,
Text>
{
public void map(Text key, Text value,OutputCollector<Text, Text> output,Reporter reporter)
throws IOException
{
output.collect(value, key);
}
}
public static class Reduce extends MapReduceBase
implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output,
Reporter reporter) throws IOException {
String csv = "";
while (values.hasNext()) {
if (csv.length() > 0) csv += ",";
csv += values.next().toString();
}
output.collect(key, new Text(csv));
}
}
public int run(String[] args) throws Exception {
Configuration conf = getConf();
JobConf job = new JobConf(conf, MyJob.class);
Path in = new Path(args[0]);
Path out = new Path(args[1]);
FileInputFormat.setInputPaths(job, in);
FileOutputFormat.setOutputPath(job, out);
job.setJobName("MyJob");
job.setMapperClass(MapClass.class);
job.setReducerClass(Reduce.class);
job.setInputFormat(KeyValueTextInputFormat.class);
job.setOutputFormat(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.set("key.value.seaparator.in.input.line", ",");
JobClient.runJob(job);
return 0;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new MyJob(), args);
System.exit(res);
}
}

Find Sum and Avertage of Salary on the basis of SEX


import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class avgtot {
public static class MapperClass extends
Mapper<LongWritable, Text, Text, FloatWritable> {
public void map(LongWritable key, Text empRecord, Context con)
throws IOException, InterruptedException {
String[] word = empRecord.toString().split("\\t");
String sex = word[2];
try {
Float salary = Float.parseFloat(word[5]);
con.write(new Text(sex), new FloatWritable(salary));
} catch (Exception e) {
e.printStackTrace();
}}}
public static class ReducerClass extends
Reducer<Text, FloatWritable, Text, Text> {
public void reduce(Text key, Iterable<FloatWritable> valueList,
Context con) throws IOException, InterruptedException {
try {
Float total = (float) 0;
int count = 0;
for (FloatWritable var : valueList)
{
total += var.get();
System.out.println("reducer " + var.get());
count++;
}
Float avg = (Float) total / count;
String out = "Total: " + total + " :: " + "Average: " + avg;
con.write(key, new Text(out));
} catch (Exception e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
Job job = Job.getInstance(conf, "avegage total");
job.setJarByClass(avgtot.class);
job.setMapperClass(MapperClass.class);
job.setReducerClass(ReducerClass.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
Path pathInput = new Path(args[0]);
Path pathOutputDir = new Path(args[1]);
FileInputFormat.addInputPath(job, pathInput);
FileOutputFormat.setOutputPath(job, pathOutputDir );
FileInputFormat.addInputPath(job, pathInput);
FileOutputFormat.setOutputPath(job, pathOutputDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (IOException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

You might also like