Lab01
Lab01
IE-494
Vansh Joshi
202101445
IE-494 Lab-1
❖ Exercise.1: Understand all solved problems here. Each problem has one Map
and one Reduce function. You need to document the following for both functions
in each problem:
1. Input parameters: Key and Value with one-line description about each
parameter. Description typically tells what data this parameter carries.
2. Output: Key and Value with one-line description about each output.
Description typically tells what data each output emits.
• Output parameters : It would return all the words with it’s respective sum of
frequency.
For eg : Horse : 3
1
IE-494 Lab-1
❖ Exercise.2:
• Colab Link:
• Source Code:
1.
import re
class EmpSum(MRJob):
def mapper(self, key, line):
record = re.split(',', line)
dno = record[2]
salary = int(record[3])
yield dno, salary
if __name__ == '__main__':
EmpSum.run()
2.
%%file empsumma.py
from mrjob.job import MRJob
import re
class EmpMaxSalary(MRJob):
def mapper(self, _, line):
record = re.split(',', line)
state = record[4]
if state == 'MA':
dno = record[2]
salary = int(record[3])
yield dno, salary
2
IE-494 Lab-1
if __name__ == '__main__':
EmpMaxSalary.run()
3.
%%file empavg.py
from mrjob.job import MRJob
import re
class EmpAvgSalary(MRJob):
def mapper(self, _, line):
record = re.split(',', line)
dno = record[2]
salary = int(record[3])
yield dno, salary
if __name__ == '__main__':
EmpAvgSalary.run()
4.
%%file empsalary_4.py
from mrjob.job import MRJob
import re
class EmpSalary(MRJob):
def mapper(self, _, line):
record = re.split(',', line)
dno = int(record[2])
salary = int(record[3])
if dno == 5 and salary > 100000:
yield None, record # Yield the entire record if it matches
if __name__ == '__main__':
EmpSalary.run()
5.
%%file emp_5.py
3
IE-494 Lab-1
class EmpCountByGender(MRJob):
def mapper(self, key, line):
record = re.split(',', line)
dno = record[2]
gender = record[5]
yield (dno, gender), 1 # Yield a tuple of (dno, gender) as key
if __name__ == '__main__':
EmpCountByGender.run()
6.
%%file monthly_summary.py
from mrjob.job import MRJob
import re
class MonthlySummary(MRJob):
4
IE-494 Lab-1
if __name__ == '__main__':
MonthlySummary.run()
7.
%%file error_counter.py
from mrjob.job import MRJob
class Error404TimeStamps(MRJob):
if __name__ == '__main__':
Error404TimeStamps.run()
--X –