0% found this document useful (0 votes)
14 views1 page

Multiclass Classification

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views1 page

Multiclass Classification

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 1

from pyspark.

sql import SparkSession


from pyspark.ml.feature import StandardScaler, VectorAssembler, StringIndexer
from pyspark.ml.classification import LogisticRegression
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

# Initialize SparkSession
spark = SparkSession.builder.appName("IrisClassification").getOrCreate()

# Load the Iris dataset


df = spark.read.csv("iris.csv", header=True, inferSchema=True)

# Prepare the data


feature_columns = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
assembler = VectorAssembler(inputCols=feature_columns, outputCol="features")
indexer = StringIndexer(inputCol="species", outputCol="label")

# Apply scaling
scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures",
withMean=True, withStd=True)

# Set up the classifier


lr = LogisticRegression(featuresCol="scaledFeatures", labelCol="label")

# Create a pipeline
pipeline = Pipeline(stages=[assembler, indexer, scaler, lr])

# Train the model


model = pipeline.fit(df)

# Make predictions
predictions = model.transform(df)

# Evaluate the model


evaluator = MulticlassClassificationEvaluator(labelCol="label",
predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions)
print(f"Accuracy: {accuracy}")

You might also like