0% found this document useful (0 votes)
6 views2 pages

Azure Code

Uploaded by

dudekulahassan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views2 pages

Azure Code

Uploaded by

dudekulahassan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

emp = [(1, "Smith",-1,"2018","10", "M",3000), \

(2, "Rose", 1, "2010","20", "Μ",4000), \

(3, "Williams", 1, "2010","10", "Μ",1000), \

(4, "Jones", 2, "2005","10","F", 2000), \

(5, "Brown", 2, "2010","40","",-1),\

(6, "Brown", 2, "2010", "50","",-1)\


]

empColumns = ["emp_id", "name","superior_emp_id", "year_joined", \

"emp_dept_id", "gender", "salary"]

empDF = spark.createDataFrame(data=emp, schema = empColumns)

empDF.printSchema()

display (empDF)

dept = [("Finance",10), \

("Marketing", 20), \

("Sales", 30), \

("IT",40) \

deptColumns = ["dept_name","dept_id"]

deptDF = spark.createDataFrame (data=dept, schema = deptColumns)

deptDF.printSchema()

display(deptDF)

empDF.createOrReplaceTempView("EMP")

deptDF.createOrReplaceTempView("DEPT")

joinDF spark.sql("select from EMP e, DEPT d where e.emp_dept_id == d.dept_id") \


.show(truncate=False)

syntax: select from emp e inner join dept d where e.id == d.id sql server = select
from emp e inner join dept d on e.id == d.id

joinDF2 = spark.sql("select * from EMP e left JOIN DEPT d ON e.emp_dept_id ==


d.dept_id") \

.show(truncate=False)

syntax for joins

innerjoin = empDF.join(deptDF, empDF.emp_dept_id == deptDF.deptDF.dept_id,"inner")


display(innerjoin)

leftjoin = empDF.join(deptDF, empDF.emp_dept_id == deptDF.deptDF.dept_id,"left")


display(leftjoin)

rightjoin = empDF.join(deptDF, empDF.emp_dept_id == deptDF.deptDF.dept_id,"right")


display(rightjoin)

fullouterjoin = empDF.join(deptDF, empDF.emp_dept_id ==


deptDF.deptDF.dept_id,"outer")
display(fullouterjoin)

leftsemi = empDF.join(deptDF, empDF.emp_dept_id ==


deptDF.deptDF.dept_id,"leftsemi")
display(leftsemi)

You might also like