# Pyspark program to add a column to a nested struct in a pyspark
# Import the libraries SparkSession, StructType,
# StructField, StringType, IntegerType, col, lit, when
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql.functions import col, lit, when
from pyspark.sql import SparkSession
# Create a spark session using getOrCreate() function
spark_session = SparkSession.builder.getOrCreate()
# Define the data set
data_set = [(('Vansh','Rai'),'2000-21-02','Male',13000),
(('Ria','Kapoor'),'2004-01-06','Female',10000)]
# Define the structure for the data frame
schema = StructType([
StructField('Full_Name', StructType([
StructField('First_Name', StringType(), True),
StructField('Last_Name', StringType(), True)])),
StructField('Date_Of_Birth', StringType(), True),
StructField('Gender', StringType(), True),
StructField('Fees', IntegerType(), True)])
# Create the Pyspark data frame using createDataFrame function
df = spark_session.createDataFrame(data = data_set,
schema = schema)
# Add nested column using lit function with
# specific condition using when and otherwise function
updated_df = df.withColumn("Full_Name",
col("Full_Name").withField("Middle_Name",
when (col("Gender")=="Male",
lit("Singh")).otherwise(lit("Kaur"))))
# Display the updated data frame
updated_df.show()