DP prog
DP prog
import numpy as np
def main():
# Creating NumPy arrays
array1 = np.array([[1, 2, 3], [4, 5, 6]])
array2 = np.array([[7, 8, 9], [10, 11, 12]])
print("Array 1:")
print (array1)
print("\nArray 2:")
print(array2)
# Element-wise operations
print("\nElement-wise operations:")
print("Addition: ")
print (array1 + array2)
print("Subtraction: ")
print (array1 - array2)
print("Multiplication: ")
print (array1*array2)
print("Division: ")
print(array1 / array2)
print("Matrix A:")
print (A)
print("\nMatrix B:")
print (B)
print("\nMatrix multiplication AB: ")
print (np.dot (A, B))
print("\nMatrix determinant of A:")
print(np.linalg.det(A))
print("\nMatrix inverse of A:")
print(np.linalg.inv(A))
if__name__ == "__main__":
main()
import pandas as pd
import matplotlib.pyplot as plt
print("DataFrame:")
print (df)
# Plotting Series
þlt.plot(df['Age']) plt.xlabel('Index') plt.ylabel('Age')
plt.title('Age Distribution')
plt.show()
# Plotting DataFrame
df.plot(kind='bar', x='Name', y='score')
plt.xlabel('Name')
plt.ylabel('Score')
plt.title('Score Distribution')
plt.show()
Laboratory 3
Title of the Laboratory Exercise: Data Exploration
a. Write a Python program to compute various summary statistics from the DataFrame.
Iris sample data, which contains information on 150 Iris flowers, 50 each from one
of
three Iris species: Setosa,Versicolour, and Virginica. Each flower is characterized
by
five attributes:
• sepal length in centimeters
• sepal width in centimeters
• petal length in centimeter
• petal width in centimeters
• class(Setosa, Versicolour,Virginica
import pandas as pd
from sklearn.datasets import load_iris
#load the iris dataset
iris = load_iris()
#iris = pd.read_csv("iris.csv")
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['class'] = iris.target
print("Data overview")
print(df.head())
print("Summary stats")
print(df.describe())
print("counts of each column")
print(df.groupby('class').size())
print("correlation matrix")
print(df.corr())
print(df.tail())
print(df.info())
print(df.dtypes)
print("/n Shape:",df.shape)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
# Classification report
print(classification_report(y_test, y_pred, target_names=iris.target_names))
# Visualizing the Iris dataset
plt.figure(figsize=(8, 6))
Laboratory 4
Title ofthe Laboratory Exercise:DataPreprocessing
import pandas as pd
import numpy as np
# Create a sample DataFrame with missing values, outliers, and duplicate data
data = {
'A': [1, 2, np.nan, 4, 5, 100], # Missing value and outlier (100)
'B': [10, np.nan, 30, 40, 50, 10], # Missing value and duplicate (10)
'C': ['x', 'Y', 'Z', np.nan, 'w', 'x'],
'D': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
}
df = pd.DataFrame(data)
# Handling Outliers
print("Handling Outliers:")
df_outliers_handled = df_filled.copy()
for col in df_outliers_handled.select_dtypes(include=['float64']).columns:
#iterate through each column of that is of float type the data and apply winsorize
function
df_outliers_handled[col] = winsorize_series(df_outliers_handled[col])
# Drop duplicates
df_no_duplicates = df_outliers_handled.drop_duplicates()
print("4. Dropping duplicates:")
print(df_no_duplicates)
print()
b. Write a Python program to implement Principal Component Analysis (PCA) for image
data
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import fetch_lfw_people
# Compute PCA
n_components = 150 # Number of principal components
pca = PCA(n_components=n_components, svd_solver='randomized', whiten=True).fit(X)
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
X = 2 * np.random.rand(100, 1) # 2D array (100, 1). Multiplying each of
these random numbers by 2 scales them to be within the range [0, 2).
y = 4 + 3 * X + np.random.randn(100, 1) # quadraric equation + noise
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"Coefficients: {model.coef_}")
print(f"Intercept: {model.intercept_}")
plt.xlabel("X")
plt.ylabel('y')
plt.show()
Laboratory 6
Title of the Laboratory Exercise: Classifications
a. Write aPython programto apply a decision tree classifier to the vertebrate
dataset.
# Preprocess the data (assuming the last column is the target variable)
X = vertebral_data.iloc[:, :-1] # Features
y = vertebral_data.iloc[:, -1] # Target variable
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(label_encoder.inverse_transform(y_test),
y_pred_original, target_names=label_encoder.classes_))
Laboratory 7
Title ofthe Laboratory Exercise: ClusterAnalysis
a. Write a Python program to implement any two clustering algorithms using Python.
import numpy as np
import matplotlib.pyplot as plt
kmeans_labels = kmeans.labels_
kmeans_centers = kmeans.cluster_centers_
plt.figure(figsize=(12, 6))
plt.subplot(121)
plt.scatter(X[:, 0], X[:, 1], c=kmeans_labels, cmap='viridis')
plt.scatter(kmeans_centers[:, 0], kmeans_centers[:, 1], marker='x', color='red',
s=200, label='Centers')
plt.title('K-Means Clustering')
plt.legend()
plt.subplot(122)
plt.title('Hierarchical Clustering Dendrogram')
dendrogram(Z)
plt.xlabel("Sample Index")
plt.tight_layout()
plt.show()
Laboratory 8
Title ofthe Laboratory Exercise:Anomaly detection.
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
# Predict anomalies
data['anomaly'] = pd.Series(model.predict(data[["value1", "value2"]])).apply(lambda
x: 1 if x == -1 else 0)
# Print anomalies
anomalies = data[data['anomaly'] == 1]
print("Detected anomalies:")
print(anomalies)
import matplotlib.pyplot as plt
# Plotting (optional)
plt.figure(figsize=(12, 6))
plt.plot(data['timestamp'], data['value1'], label='Value 1')
plt.plot(data['timestamp'], data['value2'], label='Value 2')
plt.scatter(anomalies['timestamp'], anomalies['value1'], color='red',
label='Anomaly')
plt.scatter(anomalies['timestamp'], anomalies['value2'], color='red')
plt.title('Anomaly Detection in Multivariate Time Series')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()