Data analytics assignment solutions
Data analytics assignment solutions
Create ‘User’ Data set having 5 columns namely: User ID, Gender,
Age, Estimated Salary and Purchased. Build a logistic regression
model that can predict whether on the given parameter a person
will buy a car or not.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
Assignment 1 Set B Q .1
Assignment 1 Set B Q .2
Use the iris dataset. Write a Python program to view some basic
statistical details like percentile, mean, std etc. of the species of
'Iris-setosa', 'Iris-versicolor' and 'Iris-virginica'. Apply logistic
regression on the dataset to identify different species (setosa,
versicolor, verginica) of Iris flowers given just 4 features: sepal and
petal lengths and widths.. Find the accuracy of the model.
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
print('Iris-versicolor statistics:')
print(df[df['target'] == 1].describe())
print('Iris-virginica statistics:')
print(df[df['target'] == 2].describe())
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
Assignment 2 Set B Q. 1
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
Create your own transactions dataset and apply the above process
on your dataset.
transactions = [
['apple', 'banana', 'orange', 'grape'],
['apple', 'banana', 'grape'],
['apple', 'orange'],
['banana', 'orange', 'grape'],
['apple', 'banana', 'orange', 'kiwi'],
['orange', 'kiwi'],
['apple', 'banana', 'kiwi'],
['orange', 'grape', 'kiwi'],
['apple', 'orange', 'grape', 'kiwi'],
['apple', 'banana', 'orange', 'grape', 'kiwi']
]
import re
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
# tokenize sentences
sentences = sent_tokenize(text)
# print summary
print("Summary:")
for sentence in summary:
print(sentence)
Program 8
text = "So, keep working. Keep striving. Never give up. Fall down
seven times, get up eight. Ease is a greater threat to progress than
hardship. Ease is a greater threat to progress than hardship. So,
keep moving, keep growing, keep learning. See you at work."
# Remove special characters and digits
processed_text = re.sub('[^A-Za-z]+', ' ', text)
print(processed_text)
# Tokenize sentences
sentences = sent_tokenize(processed_text)
# Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_sentences = []
for sentence in sentences:
words = sentence.split()
filtered_words = [word for word in words if word.lower() not in
stop_words]
filtered_sentence = ' '.join(filtered_words)
filtered_sentences.append(filtered_sentence)
plt.show()
# remove stopwords
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.casefold() not in
stop_words]
Program 10
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud
import matplotlib.pyplot as plt
Program 11
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
Program 12
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
user_data = pd.read_csv('user_data.csv')
X = user_data[['age']] # independent variable
y = user_data['income'] # dependent variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=0)
simple_lr = LinearRegression()
simple_lr.fit(X_train, y_train)
y_pred = simple_lr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
import pandas as pd
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer
Program 15
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# Split the dataset into training and testing sets with a 70:30 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
Program 16
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# Split the dataset into training and testing sets with a 70:30 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Create a logistic regression object
logreg = LogisticRegression()
Program 17