0% found this document useful (0 votes)
2 views

Code Examples in space

The document provides a comprehensive overview of various machine learning techniques, including supervised learning with the Iris dataset, unsupervised learning using KMeans clustering, ensemble methods with Random Forest and AdaBoost, model evaluation metrics, neural networks for classification and regression, and time series forecasting with ARIMA. It includes code examples demonstrating the implementation of these techniques using Python's scikit-learn library and visualizations using matplotlib and seaborn. Each section highlights key concepts and methods for analyzing and predicting data.

Uploaded by

lihit19426
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

Code Examples in space

The document provides a comprehensive overview of various machine learning techniques, including supervised learning with the Iris dataset, unsupervised learning using KMeans clustering, ensemble methods with Random Forest and AdaBoost, model evaluation metrics, neural networks for classification and regression, and time series forecasting with ARIMA. It includes code examples demonstrating the implementation of these techniques using Python's scikit-learn library and visualizations using matplotlib and seaborn. Each section highlights key concepts and methods for analyzing and predicting data.

Uploaded by

lihit19426
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 13

SUPERVISED LEARNING EXAMPLE

from sklearn.datasets import load_iris

# Load Iris data


iris = load_iris()
X = iris.data
y = iris.target

# Feature names and target classes


print("Feature names:", iris.feature_names)
print("Target names:", iris.target_names)

# Sample data
print("First 5 samples:\n", X[:5])
print("First 5 targets:\n", y[:5])
import pandas as pd

df = pd.DataFrame(X, columns=iris.feature_names)
df['species'] = [iris.target_names[i] for i in y]

# Display first few rows


print(df.head())

from sklearn.model_selection import train_test_split

# 80% train, 20% test


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred,
target_names=iris.target_names))

X_reduced = X[:, [0, 2, 3]] # keep indices 0, 2, 3

X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_reduced, y, test_size=0.2,


random_state=42)

scaler_r = StandardScaler()
X_train_r_scaled = scaler_r.fit_transform(X_train_r)
X_test_r_scaled = scaler_r.transform(X_test_r)

model_r = DecisionTreeClassifier()
model_r.fit(X_train_r_scaled, y_train_r)
y_pred_r = model_r.predict(X_test_r_scaled)

print("Accuracy without 'sepal width':", accuracy_score(y_test_r, y_pred_r))

model_ns = DecisionTreeClassifier()
model_ns.fit(X_train, y_train)
y_pred_ns = model_ns.predict(X_test)

print("Accuracy without scaling:", accuracy_score(y_test, y_pred_ns))

import seaborn as sns


import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, cmap='Blues', xticklabels=iris.target_names,
yticklabels=iris.target_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()
UNSUPERVISED LEARNING EXAMPLE

# Step 1: Load libraries


import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# Step 2: Load the Iris dataset (without using target labels)


iris = load_iris()
X = iris.data
feature_names = iris.feature_names
df = pd.DataFrame(X, columns=feature_names)

# Step 3: Use KMeans clustering


kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(X)

# Step 4: Reduce dimensions for visualization using PCA


pca = PCA(n_components=2)
components = pca.fit_transform(X)
df['PC1'] = components[:, 0]
df['PC2'] = components[:, 1]

# Step 5: Plot the clusters


sns.scatterplot(data=df, x='PC1', y='PC2', hue='cluster', palette='Set1')
plt.title("K-Means Clustering (Unsupervised)")
plt.show()

# Optional: Add actual labels for visual comparison


df['actual'] = iris.target

# Compare real species and predicted clusters


sns.scatterplot(data=df, x='PC1', y='PC2', hue='actual', palette='Set2')
plt.title("Actual Iris Species")
plt.show()

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Step 2: Load the Iris dataset


iris = load_iris()
X = iris.data
feature_names = iris.feature_names
df = pd.DataFrame(X, columns=feature_names)

def add_pca(df, X):


pca = PCA(n_components=2)
components = pca.fit_transform(X)
df['PC1'] = components[:, 0]
df['PC2'] = components[:, 1]
return df

for k in [2, 3, 4, 5]:


kmeans = KMeans(n_clusters=k, random_state=42)
df_k = df.copy()
df_k['cluster'] = kmeans.fit_predict(X)
df_k = add_pca(df_k, X)

plt.figure()
sns.scatterplot(data=df_k, x='PC1', y='PC2', hue='cluster', palette='Set1')
plt.title(f"KMeans Clustering with k={k}")
plt.show()

agg = AgglomerativeClustering(n_clusters=3)
df_agg = df.copy()
df_agg['cluster'] = agg.fit_predict(X)
df_agg = add_pca(df_agg, X)

plt.figure()
sns.scatterplot(data=df_agg, x='PC1', y='PC2', hue='cluster', palette='Set2')
plt.title("Agglomerative Clustering")
plt.show()

# Standardize data first (important for DBSCAN)


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

dbscan = DBSCAN(eps=0.8, min_samples=5)


df_db = df.copy()
df_db['cluster'] = dbscan.fit_predict(X_scaled)
df_db = add_pca(df_db, X_scaled)

plt.figure()
sns.scatterplot(data=df_db, x='PC1', y='PC2', hue='cluster', palette='Set3')
plt.title("DBSCAN Clustering")
plt.show()

X_petal = df[['petal length (cm)', 'petal width (cm)']].values

kmeans_petal = KMeans(n_clusters=3, random_state=42)


df_petal = df.copy()
df_petal['cluster'] = kmeans_petal.fit_predict(X_petal)
df_petal = add_pca(df_petal, X_petal)

plt.figure()
sns.scatterplot(data=df_petal, x='PC1', y='PC2', hue='cluster', palette='coolwarm')
plt.title("KMeans on Petal Features Only")
plt.show()

df_labels = add_pca(df.copy(), X)
df_labels['label'] = iris.target

plt.figure()
sns.scatterplot(data=df_labels, x='PC1', y='PC2', hue='label', palette='Dark2')
plt.title("Actual Iris Species Labels")
plt.show()

ENSEMBLE METHODS
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier,
GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the Iris dataset


iris = load_iris()
X, y = iris.data, iris.target

# Split into training and test sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("🌳 Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

ada = AdaBoostClassifier(n_estimators=50, random_state=42)


ada.fit(X_train, y_train)
y_pred_ada = ada.predict(X_test)

print("⚡ AdaBoost Accuracy:", accuracy_score(y_test, y_pred_ada))

gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb.fit(X_train, y_train)
y_pred_gb = gb.predict(X_test)

print("📈 Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))

log_clf = LogisticRegression(max_iter=200)
svc_clf = SVC(probability=True)

voting_clf = VotingClassifier(
estimators=[('lr', log_clf), ('rf', rf), ('svc', svc_clf)],
voting='hard' # can also be 'soft'
)

voting_clf.fit(X_train, y_train)
y_pred_vote = voting_clf.predict(X_test)

print(" Voting Classifier Accuracy:", accuracy_score(y_test, y_pred_vote))


MODEL EVALUATION

from sklearn.datasets import load_iris, load_diabetes


from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report,
roc_curve, auc
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load the iris dataset


iris = load_iris()
X, y = iris.data, iris.target

# Binary classification for ROC demo


y_binary = (y == 2).astype(int) # classify class 2 vs others

# Split the data


X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.3,
random_state=42)

# Train a logistic regression model


model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)


print("✅ Accuracy Score:", acc)

cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

print("📊 Classification Report:")


print(classification_report(y_test, y_pred))

y_proba = model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")


plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()

# Load diabetes dataset (regression)


diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)

# Train a linear regression model


reg = LinearRegression()
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)


mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"📈 MAE: {mae:.2f}")


print(f"📉 MSE: {mse:.2f}")
print(f"📏 RMSE: {rmse:.2f}")
print(f"🔢 R² Score: {r2:.2f}")

NEURAL NETWORK

#Örnek 1: Basit Sınıflandırma (Digits Dataset)

from sklearn.datasets import load_digits


from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Veri setini yükle


digits = load_digits()
X, y = digits.data, digits.target

# Eğitim ve test veri setine ayır


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# MLP modeli oluştur


mlp = MLPClassifier(hidden_layer_sizes=(32,), max_iter=300, random_state=1)
mlp.fit(X_train, y_train)

# Tahmin ve başarı
y_pred = mlp.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
#✅ Örnek 2: XOR Problemi (2 Girişli Mantıksal Problem)

import numpy as np
from sklearn.neural_network import MLPClassifier

# XOR verisi
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([0, 1, 1, 0])

# Model
model = MLPClassifier(hidden_layer_sizes=(4,), activation='tanh', max_iter=1000)
model.fit(X, y)

# Tahmin
print("Predictions:", model.predict(X))
#✅ Örnek 3: Tek Girişli Regresyon (sin fonksiyonu)

import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor

# sin(x) verisi üret


X = np.linspace(0, 2*np.pi, 100).reshape(-1, 1)
y = np.sin(X).ravel()

# MLP Regressor
mlp = MLPRegressor(hidden_layer_sizes=(10,10), max_iter=1000)
mlp.fit(X, y)

# Tahmin & Görselleştirme


y_pred = mlp.predict(X)
plt.plot(X, y, label='Actual')
plt.plot(X, y_pred, label='Prediction')
plt.legend()
plt.title("sin(x) regression")
plt.show()
#✅ Örnek 4: Çok Katmanlı (MLP) – Wine Dataset

from sklearn.datasets import load_wine


from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# Veri seti
data = load_wine()
X, y = data.data, data.target

# Normalizasyon
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Veri ayırma
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3,
random_state=0)

# MLP model
clf = MLPClassifier(hidden_layer_sizes=(50,), max_iter=500)
clf.fit(X_train, y_train)

# Sonuçlar
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

#Örnek 5 Basit MLP: AND problemi (Sigmoid aktivasyon, ağırlık güncelleme)


import numpy as np

# Sigmoid aktivasyon fonksiyonu ve türevi


def sigmoid(x):
return 1 / (1 + np.exp(-x))

def sigmoid_deriv(x):
return sigmoid(x) * (1 - sigmoid(x))

# Giriş (input) verisi - AND problemi


X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) # 4 örnek
y = np.array([[0], [0], [0], [1]]) # Hedef sonuçlar

# Ağırlıklar ve bias - rastgele başlat


np.random.seed(0)
weights = np.random.rand(2, 1) # 2 giriş -> 1 çıkış
bias = np.random.rand(1)

# Eğitim parametreleri
lr = 0.1 # öğrenme oranı
epochs = 10000 # kaç tekrar

# Eğitim döngüsü
for epoch in range(epochs):
# 1. İleri yayılım (forward propagation)
z = np.dot(X, weights) + bias
output = sigmoid(z)

# 2. Hata hesabı
error = y - output

# 3. Geri yayılım (backpropagation)


d_weights = np.dot(X.T, error * sigmoid_deriv(z))
d_bias = np.sum(error * sigmoid_deriv(z))

# 4. Güncelleme
weights += lr * d_weights
bias += lr * d_bias

# Durumu yazdır (isteğe bağlı)


if epoch % 2000 == 0:
loss = np.mean(np.abs(error))
print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Sonuçları yazdır
print("\nResults:")
for i in range(len(X)):
result = sigmoid(np.dot(X[i], weights) + bias)
print(f"input: {X[i]}, prediction: {result[0]:.4f}")

TIME SERIES AND FORECASTING


# Example 1: ARIMA Forecasting with Airline Dataset

import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.datasets import get_rdataset
# Load dataset
data = get_rdataset('AirPassengers').data
data['time'] = pd.date_range(start='1949-01', periods=len(data), freq='M')
data.set_index('time', inplace=True)

# Plot
data.plot(title='Monthly Air Passengers')
plt.ylabel('Passengers')
plt.show()

# Fit ARIMA Model


model = ARIMA(data['value'], order=(2, 1, 2)) # ARIMA(p,d,q)
model_fit = model.fit()
print(model_fit.summary())

# Forecast
forecast = model_fit.forecast(steps=12)
forecast.plot(title="12-Month Forecast")
plt.show()

# Example 2: Forecasting with Prophet

!pip install prophet

import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/
airline-passengers.csv")
df.columns = ['ds', 'y']

# Fit model
model = Prophet()
model.fit(df)

# Forecast next 12 months


future = model.make_future_dataframe(periods=12, freq='M')
forecast = model.predict(future)

# Plot
model.plot(forecast)
plt.title('Forecast with Prophet')
plt.show()
# Example 3: Machine Learning Approach (Random Forest)

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Load data
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-
passengers.csv"
df = pd.read_csv(url)
df.columns = ['Month', 'Passengers']
df['Month'] = pd.to_datetime(df['Month'])
df.set_index('Month', inplace=True)

# Create lag features


for lag in range(1, 13):
df[f'lag_{lag}'] = df['Passengers'].shift(lag)
df.dropna(inplace=True)

# Split data
train = df.iloc[:-12]
test = df.iloc[-12:]

X_train = train.drop('Passengers', axis=1)


y_train = train['Passengers']
X_test = test.drop('Passengers', axis=1)
y_test = test['Passengers']

# Train model
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)

# Predict
preds = model.predict(X_test)

# Plot
plt.plot(y_test.index, y_test, label='Actual')
plt.plot(y_test.index, preds, label='Predicted')
plt.legend()
plt.title('Random Forest Forecasting')
plt.show()

# RMSE
print("RMSE:", mean_squared_error(y_test, preds, squared=False))

You might also like