RDD Task1
RDD Task1
ipynb - Colab
list = [('JK', 22), ('V', 24), ('Jimin',24), ('RM', 25), ('J-Hope', 25), ('Suga', 26), ('Jin', 27)]
rdd1 = sc.parallelize(list)
rdd1.collect()
[('JK', 22),
('V', 24),
('Jimin', 24),
('RM', 25),
('J-Hope', 25),
('Suga', 26),
('Jin', 27)]
arr Read sample1.txt file into RDD and display the first 2 elements
rdd2 = sc.textFile('example.txt')
rdd2.take(2)
rdd2.count()
['Centre',
'for',
'Speech',
'and',
'Language',
'Therapy',
'and',
'Hearing',
'Science',
'Center',
'for',
'Rehabilitative',
'Auditory',
'Research',
'Department',
'of',
'Hearing',
'and',
'Speech',
'Science']
file:///C:/Users/abdo1/Downloads/RDD_Task_t.html 1/2
12/7/24, 10:42 PM RDD_Task.ipynb - Colab
['Department']
rdd3 = sc.parallelize([('a',2),('b',3)])
rdd4 = sc.parallelize([('a',9),('b',7),('c',10)])
rdd3.join(rdd4).collect()
file:///C:/Users/abdo1/Downloads/RDD_Task_t.html 2/2