Bda Lab
Bda Lab
PROGRAM:
add <- function(x, y) {
return(x + y)
}
subtract <- function(x, y) {
return(x - y)
}
multiply <- function(x, y) {
return(x * y)
}
divide <- function(x, y) {
if (y != 0) {
return(x / y)
} else {
return("Cannot divide by zero!")
}
}
num1 <- as.numeric(readline("Enter the first number: "))
num2 <- as.numeric(readline("Enter the second number: "))
operator <- readline("Enter an operator (+, -, *, /): ")
result <- 0
if (operator == "+") {
result <- add(num1, num2)
} else if (operator == "-") {
result <- subtract(num1, num2)
} else if (operator == "*") {
result <- multiply(num1, num2)
} else if (operator == "/") {
result <- divide(num1, num2)
} else {
cat("Invalid operator")
}
cat("Result: ", result, "\n")
OUTPUT:
Enter the first number: 3
Enter the second number: 3
Enter an operator (+, -, *, /): +
Result: 6
1
2. Write an R Program which takes an integer as input checks whether given number is
Positive, Negative and Zero.
PROGRAM:
number = as.integer(readline("Enter a Number:- "))
if(number > 0)
{
print(paste(number,"is the positive number"))
} else if(number < 0)
{
print(paste(number,"is the negative number"))
} else {
print(paste(number,"is the zero"))
}
OUTPUT:
Enter a Number:- 2
"2 is the positive number"
2
3. Write an R Program which takes a year as input checks whether given year is leap or
not.
PROGRAM:
year = as.integer(readline("Enter a year : "))
if(year%%4 == 0)
{
if(year%%100 == 0)
{
if(year%%400 == 0)
{
print(paste(year," is leap year"))
}
else
{
print(paste(year," is not leap year"))
}
}
else
{
print(paste(year," is leap year"))
}
} else
{
print(paste(year," is not leap year"))
}
OUTPUT:
Enter a year : 2024
"2024 is leap year"
3
4. Write a function to find odd number of integers in a given vector.
PROGRAM:
odd_count<-function(x){
k<-0
for(n in x){
if(n%%2==1)
{
cat(n," ")
}
}
return(k)
}
print("Enter a Vector : ")
vector = scan()
cat("The Odd Elements are : ")
odd_count(vector)
OUTPUT:
Enter a Vector :
1: 1
2: 2
3: 3
4: 4
5: 5
6:
Read 5 items
The Odd Elements are :
135
4
5. Write a function to find even number of integers in a given vector.
PROGRAM:
even_count<-function(x){
k<-0
for(n in x){
if(n%%2==0)
{
cat(n," ")
}
}
return(k)
}
print("Enter a Vector : ")
vector = scan()
cat("The Even Elements are : ")
even_count(vector)
OUTPUT:
Enter a Vector :
1: 1
2: 2
3: 3
4: 4
5: 5
6:
Read 5 items
The Even Elements are :
2 4
5
6. Write a function to find sum of all even numbers in given vector.
PROGRAM:
even_count<-function(x){
sum <- 0
for(n in x){
if(n%%2==0)
{
sum <- sum + n
}
}
return(sum)
}
print("Enter a Vector : ")
myVector <- scan()
print(paste("The Sum of Even Numbers in List:- ",even_count(myVector)))
OUTPUT:
Enter a Vector :
1: 1
2: 2
3: 3
4: 4
5: 5
6:
Read 5 items
The Sum of Even Numbers in List:- 6
6
7. Write a function to find sum of all odd numbers in given vector.
PROGRAM:
odd_count<-function(x){
sum <- 0
for(n in x){
if(n%%2==1)
{
sum <- sum + n
}
}
return(sum)
}
print("Enter a Vector : ")
myVector <- scan()
print(paste("The Sum of Odd Numbers in List:- ",odd_count(myVector)))
OUTPUT:
Enter a Vector :
1: 1
2: 2
3: 3
4: 4
5: 5
6:
Read 5 items
The Sum of Odd Numbers in List:- 9
7
8. Write a recursive function that calculates the factorial of a given positive integer.
Use an if-else statement to handle the base case and recursive case.
PROGRAM:
factorial <- function(n) {
if (n == 0 || n == 1) {
return(1) # Base case: factorial of 0 and 1 is 1
} else {
return(n * factorial(n - 1)) # Recursive case
}
}
num <- as.integer(readline("Enter a positive integer: "))
if (num < 0) {
cat("Please enter a positive integer.")
} else {
result <- factorial(num)
cat("Factorial of", num, "is", result, "\n")
}
OUTPUT:
Enter a positive integer: 5
Factorial of 5 is 120
8
9. Write a function that calculates the factorial of a given positive integer using a loop.
Handle the case where the input is 0 or 1 separately.
PROGRAM:
factorial <- function(n) {
if (n == 0 || n == 1) {
return(1)
} else {
result <- 1
for (i in 2:n) {
result <- result * i
}
return(result)
}
}
if (num < 0) {
cat("Please enter a positive integer.")
} else {
result <- factorial(num)
cat("Factorial of", num, "is", result, "\n")
}
OUTPUT:
Enter a positive integer: 5
Factorial of 5 is 120
9
10. Write an R Program to Convert List to Vector.
PROGRAM:
myList <- list("Apple","Banana","Orange","Grapes")
myVector <- unlist(myList)
print(myVector)
OUTPUT:
[1] "Apple" "Banana" "Orange" "Grapes"
10
11. Write an R Program to create Recursive List with Student Name, Roll No and
Marks and perform adding and deleting operations.
PROGRAM:
createStudent <- function(name, roll_no, marks) {
student <- list(Name = name, Roll_No = roll_no, Marks = marks)
return(student)
}
addStudent <- function(student_list, student) {
student_list[[length(student_list) + 1]] <- student
return(student_list)
}
deleteStudent <- function(student_list, roll_no) {
for (i in 1:length(student_list)) {
if (student_list[[i]]$Roll_No == roll_no) {
student_list <- student_list[-i]
return(student_list)
}
}
cat("Student with Roll No", roll_no, "not found.\n")
return(student_list)
}
studentList <- list()
studentList <- addStudent(studentList, createStudent("John", 101, 85))
studentList <- addStudent(studentList, createStudent("Alice", 102, 92))
studentList <- addStudent(studentList, createStudent("Bob", 103, 78))
print("Initial Student List:")
print(studentList)
newStudent <- createStudent("Eva", 104, 95)
studentList <- addStudent(studentList, newStudent)
print("Student List after Adding Eva:")
print(studentList)
studentList <- deleteStudent(studentList, 102)
print("Student List after Deleting Alice:")
print(studentList)
OUTPUT:
Initial Student List:
[[1]]
[[1]]$Name
[1] "John"
[[1]]$Roll_No
[1] 101
[[1]]$Marks
[1] 85
11
[[2]]
[[2]]$Name
[1] "Alice"
[[2]]$Roll_No
[1] 102
[[2]]$Marks
[1] 92
[[3]]
[[3]]$Name
[1] "Bob"
[[3]]$Roll_No
[1] 103
[[3]]$Marks
[1] 78
[[1]]$Roll_No
[1] 101
[[1]]$Marks
[1] 85
[[2]]
[[2]]$Name
[1] "Alice"
[[2]]$Roll_No
[1] 102
[[2]]$Marks
[1] 92
[[3]]
[[3]]$Name
[1] "Bob"
12
[[3]]$Roll_No
[1] 103
[[3]]$Marks
[1] 78
[[4]]
[[4]]$Name
[1] "Eva"
[[4]]$Roll_No
[1] 104
[[4]]$Marks
[1] 95
[[1]]$Roll_No
[1] 101
[[1]]$Marks
[1] 85
[[2]]
[[2]]$Name
[1] "Bob"
[[2]]$Roll_No
[1] 103
[[2]]$Marks
[1] 78
[[3]]
[[3]]$Name
[1] "Eva"
[[3]]$Roll_No
[1] 104
[[3]]$Marks
[1] 95
13
12. Create a function that takes a list of numeric vectors as input. The function
should return a new list where each vector has been normalized (scaled to have a
mean of 0 and standard deviation of 1).
PROGRAM:
normalize_vector <- function(vec) {
mean_val <- mean(vec)
std_dev <- sd(vec)
normalized_vec <- (vec - mean_val) / std_dev
return(normalized_vec)
}
print("Original list:")
print(input_list)
print("Normalized list:")
print(normalized_result)
OUTPUT:
Original list:
[[1]]
[1] 1 2 3 4 5
[[2]]
[1] 10 20 30 40 50
[[3]]
[1] 0 0 0 0 0
Normalized list:
[[1]]
[1] -1.2649111 -0.6324555 0.0000000 0.6324555 1.2649111
[[2]]
[1] -1.2649111 -0.6324555 0.0000000 0.6324555 1.2649111
[[3]]
[1] NaN NaN NaN NaN NaN
14
13. Create following data with column and row names.
Ravi 78 67 92
Mahesh 56 89 78
Sita 51 81 76
Neha 89 70 50
PROGRAM:
student_marks <- matrix(c(78,56,51,89,67,89,81,70,92,78,76,50),ncol = 3,nrow = 4)
print(student_marks)
OUTPUT:
Math Eng Science
Ravi 78 67 92
Mahesh 56 89 78
Sita 51 81 76
Neha 89 70 50
15
14. Write an R script which processes above data and displays the division/grade of
each student.
PROGRAM:
student_marks <- matrix(c(78,56,51,89,67,89,81,70,92,78,76,50),ncol = 3,nrow = 4)
colnames(student_marks) <- c("Math","Eng","Science")
rownames(student_marks) <- c("Ravi","Mahesh","Sita","Neha")
print(student_marks)
calculate_grade <- function(avg_marks)
{
if(avg_marks > 90)
{
return("A")
}
else if(avg_marks > 80)
{
return("B")
}
else if(avg_marks > 70)
{
return("C")
}
else if(avg_marks > 60)
{
return("D")
}
else
{
return("F")
}
}
for(student in rownames(student_marks))
{
total_marks = sum(student_marks[student,])%/%length(student_marks[student,])
grade = calculate_grade(total_marks)
print(paste("Student Name:",student,"Average Marks",total_marks,"Grade",grade))
}
OUTPUT:
Math Eng Science
Ravi 78 67 92
Mahesh 56 89 78
Sita 51 81 76
Neha 89 70 50
[1] "Student Name: Ravi Average Marks 79 Grade C"
[1] "Student Name: Mahesh Average Marks 74 Grade C"
[1] "Student Name: Sita Average Marks 69 Grade D"
[1] "Student Name: Neha Average Marks 69 Grade D"
16
15. Write a R function that takes two matrices as input and checks if they are equal
(have the same dimensions and corresponding elements).
PROGRAM:
are_matrices_equal <- function(mat1, mat2) {
if (is.matrix(mat1) && is.matrix(mat2)) {
if (identical(dim(mat1), dim(mat2))) {
return(all(mat1 == mat2))
} else {
return(FALSE)
}
} else {
return(FALSE)
}
}
if((rowA*colA) != length(vecA))
{
print("Vector Length and (Rows and Columns) Count not Match")
} else {
A = matrix(vecA,nrow=rowA,ncol=colA,byrow = TRUE)
}
if((rowB*colB) != length(vecB))
{
print("Vector Length and (Rows and Columns) Count not Match")
} else {
B = matrix(vecB,nrow=rowB,ncol=colB,byrow = TRUE)
17
}
print("Matrix A:")
print(A)
print("Matrix B:")
print(B)
OUTPUT:
[1] "Enter rows count for Matrix A: "
2
[1] "Enter cols count for Matrix A: "
2
[1] "Enter Elements for Matrix A (row - wise) "
1: 1
2: 1
3: 1
4: 1
5:
Read 4 items
[1] "Enter rows count for Matrix B: "
2
[1] "Enter cols count for Matrix B: "
2
[1] "Enter Elements for Matrix B (row - wise) "
> vecB = scan()
1: 1
2: 1
3: 1
4: 1
5:
Read 4 items
[1] "Matrix A:"
[,1] [,2]
[1,] 1 1
[2,] 1 1
[1] "Matrix B:"
[,1] [,2]
[1,] 1 1
[2,] 1 1
Are matrix A and matrix B equal? TRUE
18
16. You have two matrices, A and B, both of size 3x3. Write a program that
computes the matrix product of A and B without using the %*% operator.
PROGRAM:
matrixProduct <- function(A, B) {
if (ncol(A) != nrow(B)) {
stop("Number of columns in matrix A must be equal to the number of rows in matrix
B.")
}
result <- matrix(0, nrow(A), ncol(B))
for (i in 1:nrow(A)) {
for (j in 1:ncol(B)) {
for (k in 1:ncol(A)) {
result[i, j] <- result[i, j] + A[i, k] * B[k, j]
}
}
}
return(result)
}
print("Enter rows count for Matrix A: ")
rowA = as.integer(readline())
print("Enter cols count for Matrix A: ")
colA = as.integer(readline())
print("Enter Elements for Matrix A (row - wise) ")
vecA = scan()
if((rowA*colA) != length(vecA))
{
print("Vector Length and (Rows and Columns) Count not Match")
} else {
A = matrix(vecA,nrow=rowA,ncol=colA,byrow = TRUE)
}
print("Enter rows count for Matrix B: ")
rowB = as.integer(readline())
print("Enter cols count for Matrix B: ")
colB = as.integer(readline())
print("Enter Elements for Matrix B (row - wise) ")
vecB = scan()
if((rowB*colB) != length(vecB))
{
print("Vector Length and (Rows and Columns) Count not Match")
} else {
B = matrix(vecB,nrow=rowB,ncol=colB,byrow = TRUE)
}
19
print("Matrix A:")
print(A)
print("Matrix B:")
print(B)
print("MatrixProduct A*B : ")
print(matrixProduct(A,B))
OUTPUT:
[1] "Enter rows count for Matrix A: "
2
[1] "Enter cols count for Matrix A: "
2
[1] "Enter Elements for Matrix A (row - wise) "
> vecA = scan()
1: 1
2: 2
3: 3
4: 4
5:
Read 4 items
20
17. Write a Java Hadoop Code to run a MapReduce job for a word count
application.
PROGRAM:
WC_Mapper.java
package org.bhavani;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class WC_Mapper extends MapReduceBase implements
Mapper<LongWritable,Text,Text,IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value,OutputCollector<Text,IntWritable>
output,
Reporter reporter) throws IOException{
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()){
word.set(tokenizer.nextToken());
output.collect(word, one);
}
}
WC_Reducer.java
package org.bhavani;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
21
public void reduce(Text key, Iterator<IntWritable>
values,OutputCollector<Text,IntWritable> output,
Reporter reporter) throws IOException {
int sum=0;
while (values.hasNext()) {
sum+=values.next().get();
}
output.collect(key,new IntWritable(sum));
}
}
WC_Runner.java
package org.bhavani;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class WC_Runner {
public static void main(String[] args) throws IOException{
JobConf conf = new JobConf(WC_Runner.class);
conf.setJobName("WordCount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(WC_Mapper.class);
conf.setCombinerClass(WC_Reducer.class);
conf.setReducerClass(WC_Reducer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf,new Path(args[0]));
FileOutputFormat.setOutputPath(conf,new Path(args[1]));
JobClient.runJob(conf);
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
22
<modelVersion>4.0.0</modelVersion>
<groupId>org.bhavani</groupId>
<artifactId>WordCount</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.2.3</version>
</dependency>
</dependencies>
</project>
INPUT:
input.txt
Shiva sai
Bhavani
Sekhar
Supriya
sai
sai
Shiva
Sekhar
Bhavani
Shakeel
Supriya
23
OUTPUT:
24
18. Write a Java Hadoop Code to run a MapReduce job for a Maximum
Temperature application.
PROGRAM:
Max_temp.java
package org.bhavani2;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class Max_temp {
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text year = new Text();
private IntWritable temperature = new IntWritable();
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
if (tokenizer.hasMoreTokens()) {
// Assuming the first token is the year
String yearStr = tokenizer.nextToken();
year.set(yearStr);
// Check if there is a temperature value
if (tokenizer.hasMoreTokens()) {
String tempStr = tokenizer.nextToken().trim();
try {
int temp = Integer.parseInt(tempStr);
temperature.set(temp);
context.write(year, temperature);
} catch (NumberFormatException e) {
// Handle parsing error if necessary
System.err.println("Error parsing temperature: " + tempStr);
}
} else {
25
System.err.println("Missing temperature value for year: " + yearStr);
// Handle this case based on your application's requirements
}
} else {
System.err.println("Empty or invalid input line: " + line);
// Handle this case based on your application's requirements
}
}
}
public static class Reduce extends
Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int maxtemp=0;
for(IntWritable it : values) {
int temperature= it.get();
if(maxtemp<temperature)
{
maxtemp =temperature;
}
}
context.write(key, new IntWritable(maxtemp));
}
}
}
}
26
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.bhavani2</groupId>
<artifactId>WordCount</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.2.3</version>
</dependency>
</dependencies>
</project>
INPUT:
input.txt
1900 39
1900 14
1900 5
1900 11
1901 48
1901 21
1901 13
1902 49
27
1902 1
1902 24
1903 35
1903 35
1903 18
1904 29
1904 23
1904 28
1904 46
OUTPUT:
28
19. Write a Java Hadoop Code to run a MapReduce job for Sales by Country
application.
PROGRAM:
Sales.java
package org.bhavani3;
import java.io.IOException;
import java.io.DataInput;
import java.io.DataOutput;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class Sales {
public static class CountrySalesStatsWritable implements Writable {
private IntWritable productCount;
private LongWritable priceSum;
// Default Constructor
public CountrySalesStatsWritable()
{
this.productCount = new IntWritable();
this.priceSum = new LongWritable();
}
// Custom Constructor
public CountrySalesStatsWritable(IntWritable productCount, LongWritable
priceSum)
{
this.productCount = productCount;
this.priceSum = priceSum;
}
@Override
public void readFields(DataInput in) throws IOException
{
productCount.readFields(in);
priceSum.readFields(in);
}
@Override
public void write(DataOutput out) throws IOException
{
productCount.write(out);
priceSum.write(out);
}
29
@Override
public String toString() {
return productCount.toString() + "" + priceSum.toString();
}
}
public static class Map extends Mapper<LongWritable, Text, Text, LongWritable> {
private Text country = new Text();
private LongWritable price; // Price
Job job = Job.getInstance(conf, "sales"); // The job is used like a wrapper, the name of
the job is just for seeing which program-job is running
job.setJarByClass(Sales.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class); // Mapper output value class, also
input value class of reducer
job.setMapperClass(Map.class);
30
//job.setCombinerClass(Reduce.class); // Don’t reuse the Reducer for Combiner since
the Reducer’s input and output key value pair types do not match
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.bhavani3</groupId>
<artifactId>WordCount</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.2.3</version>
</dependency>
</dependencies>
</project>
31
INPUT:
input.csv
OUTPUT:
32
20. Implement a simple map-reduce job that builds an inverted index on the set of
input documents (Hadoop)
PROGRAM:
InvertedIndexMapper.java
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class InvertedIndexMapper extends Mapper<LongWritable, Text, Text, Text> {
private Text word = new Text();
private Text documentId = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws
IOException, InterruptedException {
String[] tokens = value.toString().split("\\s+");
if (tokens.length < 2) {
return; // Skip lines with no content
}
String docId = tokens[0];
documentId.set(docId);
for (int i = 1; i < tokens.length; i++) {
word.set(tokens[i]);
context.write(word, documentId);
}
}
}
InvertedIndexReducer.java
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
public class InvertedIndexReducer extends Reducer<Text, Text, Text, Text> {
private Text result = new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws
IOException, InterruptedException {
Set<String> documentIds = new HashSet<>();
for (Text value : values) {
documentIds.add(value.toString());
}
result.set(String.join(",", documentIds));
context.write(key, result);
}
}
33
InvertedIndexDriver.java
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
INPUT:
1.txt
doc1 Hello world
2.txt
doc2 Hadoop is great
3.txt
doc3 Hello Hadoop
34
OUTPUT:
35
21. Use R-Project to carry out statistical analysis of big data
PROGRAM:
install.packages("ggplot2")
# Load the dataset
data(iris)
# Display the first few rows of the dataset
head(iris)
# Summary statistics of the dataset
summary(iris)
# Mean of the each variables by species
aggregate(. ~ Species, data = iris, mean)
# Plotting the data
library(ggplot2)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point() +
labs(title = "Iris Dataset",
x = "Sepal Length",
y = "Sepal Width")
# Boxplot of each variable by species
boxplot(iris[, 1:4],
main = "Boxplot of Iris Dataset by Species",
xlab = " Species",
ylab = " Measurement",
col = c("skyblue", "lightgreen", "salmon"),
names = c("Sepal Length", "Sepal width", "Petal Length", "Petal width"))
OUTPUT:
> head(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
> summary(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
Median :5.800 Median :3.000 Median :4.350 Median :1.300
Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
Species
setosa :50
36
versicolor:50
virginica :50
37
22. Use R-Project for data visualization of social media data
PROGRAM:
install.packages("ggplot2")
install.packages("dplyr")
install.packages("readr")
library(readr)
social_media_data <-
read_csv("E:\\social media data.csv")
library(dplyr)
summary(social_media_data)
glimpse(social_media_data)
library(ggplot2)
ggplot(social_media_data, aes(x = date, y = likes, color = platform, group = platform)) +
geom_line() + labs(title= "Likes Over Time", x = "Date", y = "Number of Likes") +
theme_minimal()
ggsave("likes_over_time.png")
INPUT:
social media data.csv
38
OUTPUT:
> summary(social_media_data)
platform likes comments shares
Length:20 Min. :1.200 Min. : 7.00 Min. : 1.00
Class :character 1st Qu.:1.775 1st Qu.:14.75 1st Qu.: 4.75
Mode :character Median :2.500 Median :25.50 Median : 9.50
Mean :2.820 Mean :31.60 Mean :13.10
3rd Qu.:3.650 3rd Qu.:39.25 3rd Qu.:17.00
Max. :5.400 Max. :90.00 Max. :46.00
date
Length:20
Class :character
Mode :character
> glimpse(social_media_data)
Rows: 20
Columns: 5
$ platform <chr> "youtube", "instagram", "twitter", "openid", "facebook", "t…
$ likes <dbl> 2.3, 2.3, 2.3, 5.4, 1.5, 2.0, 1.5, 2.7, 1.2, 1.7, 4.6, 4.1,…
$ comments <dbl> 50, 15, 53, 90, 14, 20, 14, 40, 19, 17, 33, 33, 22, 11, 10,…
$ shares <dbl> 34, 9, 6, 26, 15, 4, 12, 7, 15, 2, 5, 46, 3, 23, 11, 2, 6, …
$ date <chr> "3/6/2023", "3/7/2023", "3/8/2023", "3/9/2023", "3/10/2024"…
39