0% found this document useful (0 votes)
14 views37 pages

History of Code

Uploaded by

vevenay873
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as RTF, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views37 pages

History of Code

Uploaded by

vevenay873
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as RTF, PDF, TXT or read online on Scribd
You are on page 1/ 37

1__________________________________________________________________

___________________

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, accuracy_score
import matplotlib.pyplot as plt
import ccxt
import time

# Load data
exchange = ccxt.bybit()

def fetch_and_preprocess_data_with_target(symbol, timeframe, limit, max_retries=3,


retry_delay=5):
retries = 0
while retries < max_retries:
try:
# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)

# Convert OHLCV data to DataFrame


df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low',
'close', 'volume'])

# Convert timestamp to datetime format


df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Handling missing values


df.dropna(inplace=True)

# Calculate percentage gain or loss based on closing prices


df['target'] = df['close'].pct_change() * 100 # Calculate percentage
change

# Remove NaN values resulting from the pct_change operation


df.dropna(inplace=True)

# Remove outliers in the 'close' price column


df = remove_outliers(df, 'close')

# Create 10-day and 50-day moving averages for the 'close' price
df = create_moving_averages(df, 'close', [10, 50])

# Normalize numerical features ('open', 'high', 'low', 'close', 'volume',


'ma10', 'ma50', 'target')
df = normalize_numerical_features(df, ['open', 'high', 'low', 'close',
'volume', 'ma10', 'ma50', 'target'])

return df
except ccxt.NetworkError as e:
print('Network error:', e)
print(f"Retrying ({retries+1}/{max_retries})...")
retries += 1
time.sleep(retry_delay)
except ccxt.ExchangeError as e:
print('Exchange error:', e)
return pd.DataFrame() # Return an empty DataFrame in case of
exchange error
except Exception as e:
print('Error:', e)
return pd.DataFrame() # Return an empty DataFrame for any other
error

print("Max retries exceeded. Unable to fetch data.")


return pd.DataFrame() # Return an empty DataFrame if max retries exceeded

def remove_outliers(df, column):


Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df

def create_moving_averages(df, column, window_sizes):


for window_size in window_sizes:
df[f'ma{window_size}'] = df[column].rolling(window=window_size).mean()
return df

def normalize_numerical_features(df, columns_to_normalize):


for column in columns_to_normalize:
df[column] = (df[column] - df[column].min()) / (df[column].max() -
df[column].min())
return df

# Fetch and preprocess data


symbol = 'DOGE/USDT'
timeframe = '1m'
limit = 10000
data = fetch_and_preprocess_data_with_target(symbol, timeframe, limit)

# Check if data is empty


if not data.empty:
# Split data into features and target
X = data.drop(['timestamp', 'target'], axis=1)
y = data['target']

# Perform feature engineering if needed

# Hyperparameter tuning using GridSearchCV


param_grid = {
'n_estimators': [50, 100, 150],
'max_depth': [3, 5, 7],
'learning_rate': [0.05, 0.1, 0.2],
'subsample': [0.8, 0.9, 1.0],
'colsample_bytree': [0.8, 0.9, 1.0]
}

# Time series cross-validation


tscv = TimeSeriesSplit(n_splits=5)

# Initialize XGBoost regressor


xgb_model = xgb.XGBRegressor()

# Grid search
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid,
cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Best parameters
best_params = grid_search.best_params_
print("Best parameters:", best_params)

# Best estimator
best_estimator = grid_search.best_estimator_

# Cross-validation results
cv_results = grid_search.cv_results_
print("Cross-validation results:", cv_results)

# Evaluate the best model


predictions = best_estimator.predict(X)
mse = mean_squared_error(y, predictions)
print("Mean Squared Error:", mse)

# Calculate accuracy
threshold = 0 # Define threshold for classifying positive and negative
predictions
binary_predictions = np.where(predictions > threshold, 1, 0)
accuracy = accuracy_score(y > threshold, binary_predictions)
print("Accuracy:", accuracy)

# Plot actual vs. predicted


plt.plot(data['timestamp'], y, label='Actual')
plt.plot(data['timestamp'], predictions, label='Predicted')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()

# Strategy Testing
# Generate signals (example: buy if predicted return > 0, sell otherwise)
signals = np.where(predictions > 0, 1, -1)

# Calculate returns
returns = signals * data['target'].shift(-1) # Shift y by 1 to align returns with
signals
cumulative_returns = returns.cumsum()

# Plot cumulative returns


plt.plot(data['timestamp'], cumulative_returns, label='Cumulative Returns')
plt.xlabel('Time')
plt.ylabel('Cumulative Returns')
plt.legend()
plt.show()
else:
print("Data fetching failed. Exiting...")

2__________________________________________________________________
_______________

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, accuracy_score
import matplotlib.pyplot as plt
import ccxt
import time
import joblib
import json
import os

# Load data
exchange = ccxt.bybit()

def fetch_and_preprocess_data_with_target(symbol, timeframe, limit, max_retries=3,


retry_delay=5):
retries = 0
while retries < max_retries:
try:
# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)

# Convert OHLCV data to DataFrame


df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low',
'close', 'volume'])

# Convert timestamp to datetime format


df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Handling missing values


df.dropna(inplace=True)

# Calculate percentage gain or loss based on closing prices


df['target'] = df['close'].pct_change() * 100 # Calculate percentage
change

# Remove NaN values resulting from the pct_change operation


df.dropna(inplace=True)

# Remove outliers in the 'close' price column


df = remove_outliers(df, 'close')

# Create 10-day and 50-day moving averages for the 'close' price
df = create_moving_averages(df, 'close', [10, 50])

# Normalize numerical features ('open', 'high', 'low', 'close', 'volume',


'ma10', 'ma50', 'target')
df = normalize_numerical_features(df, ['open', 'high', 'low', 'close',
'volume', 'ma10', 'ma50', 'target'])

return df
except ccxt.NetworkError as e:
print('Network error:', e)
print(f"Retrying ({retries+1}/{max_retries})...")
retries += 1
time.sleep(retry_delay)
except ccxt.ExchangeError as e:
print('Exchange error:', e)
return pd.DataFrame() # Return an empty DataFrame in case of
exchange error
except Exception as e:
print('Error:', e)
return pd.DataFrame() # Return an empty DataFrame for any other
error

print("Max retries exceeded. Unable to fetch data.")


return pd.DataFrame() # Return an empty DataFrame if max retries exceeded
def remove_outliers(df, column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df

def create_moving_averages(df, column, window_sizes):


for window_size in window_sizes:
df[f'ma{window_size}'] = df[column].rolling(window=window_size).mean()
return df

def normalize_numerical_features(df, columns_to_normalize):


for column in columns_to_normalize:
df[column] = (df[column] - df[column].min()) / (df[column].max() -
df[column].min())
return df

# Fetch and preprocess data


symbol = 'DOGE/USDT' # Example cryptocurrency symbol
timeframe = '1m'
limit = 10000
data = fetch_and_preprocess_data_with_target(symbol, timeframe, limit)

# Check if data is empty


if not data.empty:
# Split data into features and target
X = data.drop(['timestamp', 'target'], axis=1)
y = data['target']

# Perform feature engineering if needed

# Hyperparameter tuning using GridSearchCV


param_grid = {
'n_estimators': [50, 100, 150],
'max_depth': [3, 5, 7],
'learning_rate': [0.05, 0.1, 0.2],
'subsample': [0.8, 0.9, 1.0],
'colsample_bytree': [0.8, 0.9, 1.0]
}

# Time series cross-validation


tscv = TimeSeriesSplit(n_splits=5)

# Initialize XGBoost regressor


xgb_model = xgb.XGBRegressor()
# Grid search
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid,
cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Best parameters
best_params = grid_search.best_params_
print("Best parameters:", best_params)

# Best estimator
best_estimator = grid_search.best_estimator_

# Cross-validation results
cv_results = grid_search.cv_results_
print("Cross-validation results:", cv_results)

# Evaluate the best model


predictions = best_estimator.predict(X)
mse = mean_squared_error(y, predictions)
print("Mean Squared Error:", mse)

# Calculate accuracy
threshold = 0 # Define threshold for classifying positive and negative
predictions
binary_predictions = np.where(predictions > threshold, 1, 0)
accuracy = accuracy_score(y > threshold, binary_predictions)
print("Accuracy:", accuracy)

# Plot actual vs. predicted


plt.plot(data['timestamp'], y, label='Actual')
plt.plot(data['timestamp'], predictions, label='Predicted')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()

# Strategy Testing
# Generate signals (example: buy if predicted return > 0, sell otherwise)
signals = np.where(predictions > 0, 1, -1)

# Calculate returns
returns = signals * data['target'].shift(-1) # Shift y by 1 to align returns with
signals
cumulative_returns = returns.cumsum()

# Plot cumulative returns


plt.plot(data['timestamp'], cumulative_returns, label='Cumulative Returns')
plt.xlabel('Time')
plt.ylabel('Cumulative Returns')
plt.legend()
plt.show()

# Save the trained model with the cryptocurrency name


model_filename = f'xgboost_model_{symbol.replace("/", "_")}.pkl'
joblib.dump(best_estimator, model_filename)

# Log metadata with the cryptocurrency name


metadata = {
'crypto_symbol': symbol,
'model_filename': model_filename,
'best_params': best_params,
'mse': mse,
'accuracy': accuracy,
'code_version': '1.0', # Example code version
'timestamp': str(pd.Timestamp.now()) # Current timestamp
}

# Save metadata to a JSON file with the cryptocurrency name


metadata_filename = f'model_metadata_{symbol.replace("/", "_")}.json'
with open(metadata_filename, 'w') as f:
json.dump(metadata, f)

print("Model saved as:", model_filename)


print("Metadata saved as:", metadata_filename)

else:
print("Data fetching failed. Exiting...")

3__________________________________________________________________
________________

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, accuracy_score
import matplotlib.pyplot as plt
import ccxt
import time
import joblib
import json
import os

def get_kraken_pairs():
kraken = ccxt.kraken()
markets = kraken.load_markets()
kraken_pairs = list(markets.keys())
return kraken_pairs

def fetch_and_preprocess_data_with_target(symbol, timeframe, limit, max_retries=3,


retry_delay=5):
retries = 0
while retries < max_retries:
try:
# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)

# Convert OHLCV data to DataFrame


df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low',
'close', 'volume'])

# Convert timestamp to datetime format


df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Handling missing values


df.dropna(inplace=True)

# Calculate percentage gain or loss based on closing prices


df['target'] = df['close'].pct_change() * 100 # Calculate percentage
change

# Remove NaN values resulting from the pct_change operation


df.dropna(inplace=True)

# Remove outliers in the 'close' price column


df = remove_outliers(df, 'close')

# Create 10-day and 50-day moving averages for the 'close' price
df = create_moving_averages(df, 'close', [10, 50])

# Normalize numerical features ('open', 'high', 'low', 'close', 'volume',


'ma10', 'ma50', 'target')
df = normalize_numerical_features(df, ['open', 'high', 'low', 'close',
'volume', 'ma10', 'ma50', 'target'])

return df
except ccxt.NetworkError as e:
print('Network error:', e)
print(f"Retrying ({retries+1}/{max_retries})...")
retries += 1
time.sleep(retry_delay)
except ccxt.ExchangeError as e:
print('Exchange error:', e)
return pd.DataFrame() # Return an empty DataFrame in case of
exchange error
except Exception as e:
print('Error:', e)
return pd.DataFrame() # Return an empty DataFrame for any other
error

print("Max retries exceeded. Unable to fetch data.")


return pd.DataFrame() # Return an empty DataFrame if max retries exceeded

def remove_outliers(df, column):


Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df

def create_moving_averages(df, column, window_sizes):


for window_size in window_sizes:
df[f'ma{window_size}'] = df[column].rolling(window=window_size).mean()
return df

def normalize_numerical_features(df, columns_to_normalize):


for column in columns_to_normalize:
df[column] = (df[column] - df[column].min()) / (df[column].max() -
df[column].min())
return df

# Load data
exchange = ccxt.bybit()

# Fetch and preprocess data


kraken_pairs = get_kraken_pairs()

# Filter pairs with USDC


usdc_pairs = [pair for pair in kraken_pairs if '/USDC' in pair]

for symbol in usdc_pairs:


# Fetch and preprocess data
symbol_data = fetch_and_preprocess_data_with_target(symbol, '1m', 10000)

if not symbol_data.empty:
# Split data into features and target
X = symbol_data.drop(['timestamp', 'target'], axis=1)
y = symbol_data['target']

# Hyperparameter tuning using GridSearchCV


param_grid = {
'n_estimators': [50, 100, 150],
'max_depth': [3, 5, 7],
'learning_rate': [0.05, 0.1, 0.2],
'subsample': [0.8, 0.9, 1.0],
'colsample_bytree': [0.8, 0.9, 1.0]
}

# Time series cross-validation


tscv = TimeSeriesSplit(n_splits=5)

# Initialize XGBoost regressor


xgb_model = xgb.XGBRegressor()

# Grid search
grid_search = GridSearchCV(estimator=xgb_model,
param_grid=param_grid, cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Best parameters
best_params = grid_search.best_params_

# Best estimator
best_estimator = grid_search.best_estimator_

# Evaluate the best model


predictions = best_estimator.predict(X)
mse = mean_squared_error(y, predictions)

# Calculate accuracy
threshold = 0 # Define threshold for classifying positive and negative
predictions
binary_predictions = np.where(predictions > threshold, 1, 0)
accuracy = accuracy_score(y > threshold, binary_predictions)

# Save the trained model with the cryptocurrency name


model_filename = f'xgboost_model_{symbol.replace("/", "_")}.pkl'
joblib.dump(best_estimator, model_filename)

# Log metadata with the cryptocurrency name


metadata = {
'crypto_symbol': symbol,
'model_filename': model_filename,
'best_params': best_params,
'mse': mse,
'accuracy': accuracy,
'code_version': '1.0', # Example code version
'timestamp': str(pd.Timestamp.now()) # Current timestamp
}

# Save metadata to a JSON file with the cryptocurrency name


metadata_filename = f'model_metadata_{symbol.replace("/", "_")}.json'
with open(metadata_filename, 'w') as f:
json.dump(metadata, f)
print(f"Model saved as: {model_filename}, Metadata saved as:
{metadata_filename}")
else:
print(f"Data fetching failed for {symbol}.")

print("All models and metadata saved successfully.")

4__________________________________________________________________
________________

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, accuracy_score
import matplotlib.pyplot as plt
import ccxt
import time
import joblib
import json
import os

def get_kraken_pairs():
kraken = ccxt.kraken()
markets = kraken.load_markets()
kraken_pairs = list(markets.keys())
return kraken_pairs

def fetch_and_preprocess_data_with_target(symbol, timeframe, limit, max_retries=3,


retry_delay=5):
retries = 0
while retries < max_retries:
try:
# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)

# Convert OHLCV data to DataFrame


df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low',
'close', 'volume'])

# Convert timestamp to datetime format


df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
# Handling missing values
df.dropna(inplace=True)

# Calculate percentage gain or loss based on closing prices


df['target'] = df['close'].pct_change() * 100 # Calculate percentage
change

# Remove NaN values resulting from the pct_change operation


df.dropna(inplace=True)

# Remove outliers in the 'close' price column


df = remove_outliers(df, 'close')

# Create 10-day and 50-day moving averages for the 'close' price
df = create_moving_averages(df, 'close', [10, 50])

# Normalize numerical features ('open', 'high', 'low', 'close', 'volume',


'ma10', 'ma50', 'target')
df = normalize_numerical_features(df, ['open', 'high', 'low', 'close',
'volume', 'ma10', 'ma50', 'target'])

return df
except ccxt.NetworkError as e:
print('Network error:', e)
print(f"Retrying ({retries+1}/{max_retries})...")
retries += 1
time.sleep(retry_delay)
except ccxt.ExchangeError as e:
print('Exchange error:', e)
return pd.DataFrame() # Return an empty DataFrame in case of
exchange error
except Exception as e:
print('Error:', e)
return pd.DataFrame() # Return an empty DataFrame for any other
error

print("Max retries exceeded. Unable to fetch data.")


return pd.DataFrame() # Return an empty DataFrame if max retries exceeded

def remove_outliers(df, column):


Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df

def create_moving_averages(df, column, window_sizes):


for window_size in window_sizes:
df[f'ma{window_size}'] = df[column].rolling(window=window_size).mean()
return df

def normalize_numerical_features(df, columns_to_normalize):


for column in columns_to_normalize:
df[column] = (df[column] - df[column].min()) / (df[column].max() -
df[column].min())
return df

# Load data
exchange = ccxt.bybit()

# Fetch and preprocess data


kraken_pairs = get_kraken_pairs()

# Filter pairs with USDC


usdc_pairs = [pair for pair in kraken_pairs if '/USDC' in pair]

for symbol in usdc_pairs:


# Fetch and preprocess data
symbol_data = fetch_and_preprocess_data_with_target(symbol, '1m', 10000)

if not symbol_data.empty:
# Split data into features and target
X = symbol_data.drop(['timestamp', 'target'], axis=1)
y = symbol_data['target']

# Print data shape and parameter grid


print("Data shape:", X.shape, y.shape)
print("Parameter grid:", param_grid)

# Hyperparameter tuning using GridSearchCV


param_grid = {
'n_estimators': [50, 100, 150],
'max_depth': [3, 5, 7],
'learning_rate': [0.05, 0.1, 0.2],
'subsample': [0.8, 0.9, 1.0],
'colsample_bytree': [0.8, 0.9, 1.0]
}

# Time series cross-validation


tscv = TimeSeriesSplit(n_splits=5)

# Initialize XGBoost regressor


xgb_model = xgb.XGBRegressor()

# Grid search
grid_search = GridSearchCV(estimator=xgb_model,
param_grid=param_grid, cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)
# Best parameters
best_params = grid_search.best_params_

# Best estimator
best_estimator = grid_search.best_estimator_

# Evaluate the best model


predictions = best_estimator.predict(X)
mse = mean_squared_error(y, predictions)

# Calculate accuracy
threshold = 0 # Define threshold for classifying positive and negative
predictions
binary_predictions = np.where(predictions > threshold, 1, 0)
accuracy = accuracy_score(y > threshold, binary_predictions)

# Save the trained model with the cryptocurrency name


model_filename = f'xgboost_model_{symbol.replace("/", "_")}.pkl'
joblib.dump(best_estimator, model_filename)

# Log metadata with the cryptocurrency name


metadata = {
'crypto_symbol': symbol,
'model_filename': model_filename,
'best_params': best_params,
'mse': mse,
'accuracy': accuracy,
'code_version': '1.0', # Example code version
'timestamp': str(pd.Timestamp.now()) # Current timestamp
}

# Save metadata to a JSON file with the cryptocurrency name


metadata_filename = f'model_metadata_{symbol.replace("/", "_")}.json'
with open(metadata_filename, 'w') as f:
json.dump(metadata, f)

print(f"Model saved as: {model_filename}, Metadata saved as:


{metadata_filename}")
else:
print(f"Data fetching failed for {symbol}.")

print("All models and metadata saved successfully.")

5__________________________________________________________________
_____________
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, accuracy_score
import matplotlib.pyplot as plt
import ccxt
import time
import joblib
import json
import os

def get_kraken_pairs():
kraken = ccxt.kraken()
markets = kraken.load_markets()
kraken_pairs = list(markets.keys())
return kraken_pairs

def fetch_and_preprocess_data_with_target(symbol, timeframe, limit, max_retries=3,


retry_delay=5):
exchange = ccxt.kraken() # Moved exchange instantiation inside the function
retries = 0
while retries < max_retries:
try:
# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)

# Convert OHLCV data to DataFrame


df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low',
'close', 'volume'])

# Convert timestamp to datetime format


df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Handling missing values


df.dropna(inplace=True)

# Calculate percentage gain or loss based on closing prices


df['target'] = df['close'].pct_change() * 100 # Calculate percentage
change

# Remove NaN values resulting from the pct_change operation


df.dropna(inplace=True)

# Remove outliers in the 'close' price column


df = remove_outliers(df, 'close')

# Create 10-day and 50-day moving averages for the 'close' price
df = create_moving_averages(df, 'close', [10, 50])
# Normalize numerical features ('open', 'high', 'low', 'close', 'volume',
'ma10', 'ma50', 'target')
df = normalize_numerical_features(df, ['open', 'high', 'low', 'close',
'volume', 'ma10', 'ma50', 'target'])

return df
except ccxt.NetworkError as e:
print('Network error:', e)
print(f"Retrying ({retries+1}/{max_retries})...")
retries += 1
time.sleep(retry_delay)
except ccxt.ExchangeError as e:
print('Exchange error:', e)
return pd.DataFrame() # Return an empty DataFrame in case of
exchange error
except Exception as e:
print('Error:', e)
return pd.DataFrame() # Return an empty DataFrame for any other
error

print("Max retries exceeded. Unable to fetch data.")


return pd.DataFrame() # Return an empty DataFrame if max retries exceeded

def remove_outliers(df, column):


Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df

def create_moving_averages(df, column, window_sizes):


for window_size in window_sizes:
df[f'ma{window_size}'] = df[column].rolling(window=window_size).mean()
return df

def normalize_numerical_features(df, columns_to_normalize):


for column in columns_to_normalize:
df[column] = (df[column] - df[column].min()) / (df[column].max() -
df[column].min())
return df

def train_and_save_model(symbol, limit=50000):


# Fetch and preprocess data with a limit of 50000 data points
data = fetch_and_preprocess_data_with_target(symbol, '1m', limit)

# Check if data is empty


if not data.empty:
# Check for missing values in target variable y
missing_values = data['target'].isnull().sum()
if missing_values > 0:
print(f"Found {missing_values} missing values in the target variable y.
Removing corresponding rows...")
data.dropna(subset=['target'], inplace=True)

# Split data into features and target


X = data.drop(['timestamp', 'target'], axis=1)
y = data['target']

# Perform feature engineering if needed

# Hyperparameter tuning using GridSearchCV


param_grid = {
'n_estimators': [50, 100, 150],
'max_depth': [3, 5, 7],
'learning_rate': [0.05, 0.1, 0.2],
'subsample': [0.8, 0.9, 1.0],
'colsample_bytree': [0.8, 0.9, 1.0]
}

# Time series cross-validation


tscv = TimeSeriesSplit(n_splits=5)

# Initialize XGBoost regressor


xgb_model = xgb.XGBRegressor()

# Grid search
grid_search = GridSearchCV(estimator=xgb_model,
param_grid=param_grid, cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Best parameters
best_params = grid_search.best_params_

# Best estimator
best_estimator = grid_search.best_estimator_

# Evaluate the best model


predictions = best_estimator.predict(X)
mse = mean_squared_error(y, predictions)

# Calculate accuracy
threshold = 0 # Define threshold for classifying positive and negative
predictions
binary_predictions = np.where(predictions > threshold, 1, 0)
accuracy = accuracy_score(y > threshold, binary_predictions)

# Save the trained model with the cryptocurrency name


model_filename = f'xgboost_model_{symbol.replace("/", "_")}.pkl'
joblib.dump(best_estimator, model_filename)

# Log metadata with the cryptocurrency name


metadata = {
'crypto_symbol': symbol,
'model_filename': model_filename,
'best_params': best_params,
'mse': mse,
'accuracy': accuracy,
'code_version': '1.0', # Example code version
'timestamp': str(pd.Timestamp.now()) # Current timestamp
}

# Save metadata to a JSON file with the cryptocurrency name


metadata_filename = f'model_metadata_{symbol.replace("/", "_")}.json'
with open(metadata_filename, 'w') as f:
json.dump(metadata, f)

print(f"Model saved as: {model_filename}, Metadata saved as:


{metadata_filename}")
else:
print(f"Data fetching failed for {symbol}.")

def delete_trained_data(crypto_pairs):
for symbol in crypto_pairs:
# Delete existing model file
model_filename = f'xgboost_model_{symbol.replace("/", "_")}.pkl'
if os.path.exists(model_filename):
os.remove(model_filename)
print(f"Deleted model file: {model_filename}")

# Delete existing metadata file


metadata_filename = f'model_metadata_{symbol.replace("/", "_")}.json'
if os.path.exists(metadata_filename):
os.remove(metadata_filename)
print(f"Deleted metadata file: {metadata_filename}")

# Cryptocurrency pair list


crypto_pairs = get_kraken_pairs()

# Filter pairs with USDC


usdc_pairs = [pair for pair in crypto_pairs if '/USDC' in pair]

# Delete data that has already been trained and tested on


delete_trained_data(usdc_pairs)

# Train and save models for each cryptocurrency pair


for symbol in usdc_pairs:
train_and_save_model(symbol)
6__________________________________________________________________
________________

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, accuracy_score
import matplotlib.pyplot as plt
import ccxt
import time
import joblib
import json
import os

def get_kraken_pairs():
kraken = ccxt.kraken()
markets = kraken.load_markets()
kraken_pairs = list(markets.keys())
return kraken_pairs

def fetch_and_preprocess_data_with_target(symbol, timeframe, limit, max_retries=3,


retry_delay=5):
retries = 0
while retries < max_retries:
try:
# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)

# Convert OHLCV data to DataFrame


df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low',
'close', 'volume'])

# Convert timestamp to datetime format


df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Handling missing values


df.dropna(inplace=True)

# Calculate percentage gain or loss based on closing prices


df['target'] = df['close'].pct_change() * 100 # Calculate percentage
change
# Remove NaN values resulting from the pct_change operation
df.dropna(inplace=True)

# Remove outliers in the 'close' price column


df = remove_outliers(df, 'close')

# Create 10-day and 50-day moving averages for the 'close' price
df = create_moving_averages(df, 'close', [10, 50])

# Normalize numerical features ('open', 'high', 'low', 'close', 'volume',


'ma10', 'ma50', 'target')
df = normalize_numerical_features(df, ['open', 'high', 'low', 'close',
'volume', 'ma10', 'ma50', 'target'])

return df
except ccxt.NetworkError as e:
print('Network error:', e)
print(f"Retrying ({retries+1}/{max_retries})...")
retries += 1
time.sleep(retry_delay)
except ccxt.ExchangeError as e:
print('Exchange error:', e)
return pd.DataFrame() # Return an empty DataFrame in case of
exchange error
except Exception as e:
print('Error:', e)
return pd.DataFrame() # Return an empty DataFrame for any other
error

print("Max retries exceeded. Unable to fetch data.")


return pd.DataFrame() # Return an empty DataFrame if max retries exceeded

def remove_outliers(df, column):


Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df

def create_moving_averages(df, column, window_sizes):


for window_size in window_sizes:
df[f'ma{window_size}'] = df[column].rolling(window=window_size).mean()
return df

def normalize_numerical_features(df, columns_to_normalize):


for column in columns_to_normalize:
df[column] = (df[column] - df[column].min()) / (df[column].max() -
df[column].min())
return df

# Load data
exchange = ccxt.kraken()

def train_and_save_model(symbol, limit=50000):


# Fetch and preprocess data with a limit of 50000 data points
data = fetch_and_preprocess_data_with_target(symbol, '1m', limit)

# Check if data is empty or has insufficient samples after removing missing


values
if data.empty or len(data) < 10: # Adjust the threshold as needed
print(f"Insufficient data available for training for {symbol}.")
return

# Check for missing values in target variable y


missing_values = data['target'].isnull().sum()
if missing_values > 0:
print(f"Found {missing_values} missing values in the target variable y.
Removing corresponding rows...")
data.dropna(subset=['target'], inplace=True)

# Split data into features and target


X = data.drop(['timestamp', 'target'], axis=1)
y = data['target']

# Hyperparameter tuning using GridSearchCV


param_grid = {
'n_estimators': [50, 100, 150],
'max_depth': [3, 5, 7],
'learning_rate': [0.05, 0.1, 0.2],
'subsample': [0.8, 0.9, 1.0],
'colsample_bytree': [0.8, 0.9, 1.0]
}

# Time series cross-validation


tscv = TimeSeriesSplit(n_splits=5)

# Initialize XGBoost regressor


xgb_model = xgb.XGBRegressor()

# Grid search
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid,
cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Best parameters
best_params = grid_search.best_params_

# Best estimator
best_estimator = grid_search.best_estimator_

# Evaluate the best model


predictions = best_estimator.predict(X)
mse = mean_squared_error(y, predictions)

# Calculate accuracy
threshold = 0 # Define threshold for classifying positive and negative
predictions
binary_predictions = np.where(predictions > threshold, 1, 0)
accuracy = accuracy_score(y > threshold, binary_predictions)

# Save the trained model with the cryptocurrency name


model_filename = f'xgboost_model_{symbol.replace("/", "_")}.pkl'
joblib.dump(best_estimator, model_filename)

# Log metadata with the cryptocurrency name


metadata = {
'crypto_symbol': symbol,
'model_filename': model_filename,
'best_params': best_params,
'mse': mse,
'accuracy': accuracy,
'code_version': '1.0', # Example code version
'timestamp': str(pd.Timestamp.now()) # Current timestamp
}

# Save metadata to a JSON file with the cryptocurrency name


metadata_filename = f'model_metadata_{symbol.replace("/", "_")}.json'
with open(metadata_filename, 'w') as f:
json.dump(metadata, f)

print(f"Model saved as: {model_filename}, Metadata saved as:


{metadata_filename}")

# Fetch and preprocess data


kraken_pairs = get_kraken_pairs()

# Filter pairs with USDC


usdc_pairs = [pair for pair in kraken_pairs if '/USDC' in pair]

# Train and save models for each cryptocurrency pair


for symbol in usdc_pairs:
train_and_save_model(symbol)

print("All models and metadata saved successfully.")

7__________________________________________________________________
________-device

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, accuracy_score
import ccxt
import time
import joblib
import logging
import asyncio
import tkinter as tk
from tkinter import messagebox
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from mplfinance.original_flavor import candlestick_ohlc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Setup logging
logging.basicConfig(filename='trading.log', level=logging.INFO, format='%(asctime)s
- %(levelname)s - %(message)s')

# Load data
exchange = ccxt.kraken() # Use Kraken exchange

# Define function to fetch and preprocess data with target


async def fetch_and_preprocess_data_with_target_async(symbol, timeframe, limit,
max_retries=3, retry_delay=5):
retries = 0
while retries < max_retries:
try:
# Fetch OHLCV data
ohlcv = await exchange.fetch_ohlcv(symbol, timeframe, limit=limit)

# Convert OHLCV data to DataFrame


df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low',
'close', 'volume'])

# Convert timestamp to datetime format


df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Handling missing values


df.dropna(inplace=True)

# Calculate percentage gain or loss based on closing prices


df['target'] = df['close'].pct_change() * 100 # Calculate percentage
change
# Remove NaN values resulting from the pct_change operation
df.dropna(inplace=True)

# Remove outliers in the 'close' price column


df = remove_outliers(df, 'close')

# Create 10-day and 50-day moving averages for the 'close' price
df = create_moving_averages(df, 'close', [10, 50])

# Normalize numerical features ('open', 'high', 'low', 'close', 'volume',


'ma10', 'ma50', 'target')
df = normalize_numerical_features(df, ['open', 'high', 'low', 'close',
'volume', 'ma10', 'ma50', 'target'])

return df
except ccxt.NetworkError as e:
logging.error(f'Network error: {e}')
logging.info(f"Retrying ({retries+1}/{max_retries})...")
retries += 1
await asyncio.sleep(retry_delay)
except ccxt.ExchangeError as e:
logging.error(f'Exchange error: {e}')
return pd.DataFrame() # Return an empty DataFrame in case of
exchange error
except Exception as e:
logging.error(f'Error: {e}')
return pd.DataFrame() # Return an empty DataFrame for any other
error

logging.info("Max retries exceeded. Unable to fetch data.")


return pd.DataFrame() # Return an empty DataFrame if max retries exceeded

# Define function to remove outliers


def remove_outliers(df, column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df

# Define function to create moving averages


def create_moving_averages(df, column, window_sizes):
for window_size in window_sizes:
df[f'ma{window_size}'] = df[column].rolling(window=window_size).mean()
return df

# Define function to normalize numerical features


def normalize_numerical_features(df, columns_to_normalize):
for column in columns_to_normalize:
df[column] = (df[column] - df[column].min()) / (df[column].max() -
df[column].min())
return df

# Define function to retrain or load existing model


def retrain_or_load_model(symbol):
model_filename = f'xgboost_model_{symbol.replace("/", "_")}.pkl'
if os.path.isfile(model_filename):
logging.info(f"Model found for {symbol}. Loading...")
model = joblib.load(model_filename)
return model
else:
logging.info(f"No model found for {symbol}. Training new model...")
return train_model(symbol)

# Define function to train model


def train_model(symbol):
# Fetch and preprocess data
timeframe = '1m'
limit = 10000
data = asyncio.run(fetch_and_preprocess_data_with_target_async(symbol,
timeframe, limit))

if data.empty:
logging.error("Data fetching failed. Exiting...")
return None

# Split data into features and target


X = data.drop(['timestamp', 'target'], axis=1)
y = data['target']

# Perform hyperparameter tuning using GridSearchCV


param_grid = {
'n_estimators': [50, 100, 150],
'max_depth': [3, 5, 7],
'learning_rate': [0.05, 0.1, 0.2],
'subsample': [0.8, 0.9, 1.0],
'colsample_bytree': [0.8, 0.9, 1.0]
}

# Time series cross-validation


tscv = TimeSeriesSplit(n_splits=5)

# Initialize XGBoost regressor


xgb_model = xgb.XGBRegressor()

# Grid search
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid,
cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Best estimator
best_estimator = grid_search.best_estimator_

# Save the trained model


joblib.dump(best_estimator, f'xgboost_model_{symbol.replace("/", "_")}.pkl')

return best_estimator

# Define function to execute trading strategy


async def execute_real_time_trading(symbol, model):
while True:
# Fetch and preprocess data
data = await fetch_and_preprocess_data_with_target_async(symbol, '1m', 1)
if not data.empty:
X = data.drop(['timestamp', 'target'], axis=1)
predictions = model.predict(X)
# Implement trading strategy using predictions
# ...
time.sleep(60) # Wait for 1 minute before fetching new data
else:
logging.warning("Data fetching failed. Retrying in 5 seconds...")
await asyncio.sleep(5)

# Define function to start real-time trading


async def start_real_time_trading(symbol):
# Retrain or load existing model
model = retrain_or_load_model(symbol)

# If model exists, start trading


if model:
await execute_real_time_trading(symbol, model)
else:
logging.error("Model training failed. Exiting...")

# Define Tkinter interface


class TradingApp:
def __init__(self, root):
self.root = root
self.root.title("Real-Time Trading App")
self.symbol_label = tk.Label(root, text="Symbol:")
self.symbol_label.pack()
self.symbol_entry = tk.Entry(root)
self.symbol_entry.pack()
self.start_button = tk.Button(root, text="Start Trading",
command=self.start_trading)
self.start_button.pack()
self.fig = plt.Figure(figsize=(6, 4), dpi=100)
self.ax = self.fig.add_subplot(111)
self.chart_canvas = FigureCanvasTkAgg(self.fig, master=root)
self.chart_canvas.get_tk_widget().pack(side=tk.BOTTOM, fill=tk.BOTH,
expand=True)

def start_trading(self):
symbol = self.symbol_entry.get()
if symbol:
logging.info(f"Starting real-time trading for {symbol}...")
self.start_button.config(state="disabled")
self.symbol_entry.config(state="disabled")
asyncio.run(start_real_time_trading(symbol))
self.plot_chart(symbol)
else:
messagebox.showwarning("Warning", "Please enter a symbol.")

def plot_chart(self, symbol):


# Fetch historical data for chart
data = asyncio.run(fetch_and_preprocess_data_with_target_async(symbol,
'1d', 365))
if not data.empty:
data['timestamp'] = data['timestamp'].apply(mdates.date2num)
ohlc = data[['timestamp', 'open', 'high', 'low', 'close']].values
self.ax.clear()
candlestick_ohlc(self.ax, ohlc, width=0.6, colorup='g', colordown='r',
alpha=0.8)
self.ax.xaxis_date()
self.ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
self.ax.set_title(f'{symbol} Candlestick Chart')
self.ax.set_xlabel('Date')
self.ax.set_ylabel('Price')
self.chart_canvas.draw()
else:
messagebox.showerror("Error", "Failed to fetch historical data for chart.")

# Create Tkinter window


root = tk.Tk()
app = TradingApp(root)
root.mainloop()

7__________________________________________________________________
_-colab

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, accuracy_score
import ccxt
import time
import joblib
import logging
import asyncio
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from mplfinance.original_flavor import candlestick_ohlc

# Setup logging
logging.basicConfig(filename='trading.log', level=logging.INFO, format='%(asctime)s
- %(levelname)s - %(message)s')

# Load data
exchange = ccxt.kraken() # Use Kraken exchange

# Define function to fetch and preprocess data with target


async def fetch_and_preprocess_data_with_target_async(symbol, timeframe, limit,
max_retries=3, retry_delay=5):
retries = 0
while retries < max_retries:
try:
# Fetch OHLCV data
ohlcv = await exchange.fetch_ohlcv(symbol, timeframe, limit=limit)

# Convert OHLCV data to DataFrame


df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low',
'close', 'volume'])

# Convert timestamp to datetime format


df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Handling missing values


df.dropna(inplace=True)

# Calculate percentage gain or loss based on closing prices


df['target'] = df['close'].pct_change() * 100 # Calculate percentage
change

# Remove NaN values resulting from the pct_change operation


df.dropna(inplace=True)

# Remove outliers in the 'close' price column


df = remove_outliers(df, 'close')

# Create 10-day and 50-day moving averages for the 'close' price
df = create_moving_averages(df, 'close', [10, 50])

# Normalize numerical features ('open', 'high', 'low', 'close', 'volume',


'ma10', 'ma50', 'target')
df = normalize_numerical_features(df, ['open', 'high', 'low', 'close',
'volume', 'ma10', 'ma50', 'target'])

return df
except ccxt.NetworkError as e:
logging.error(f'Network error: {e}')
logging.info(f"Retrying ({retries+1}/{max_retries})...")
retries += 1
await asyncio.sleep(retry_delay)
except ccxt.ExchangeError as e:
logging.error(f'Exchange error: {e}')
return pd.DataFrame() # Return an empty DataFrame in case of
exchange error
except Exception as e:
logging.error(f'Error: {e}')
return pd.DataFrame() # Return an empty DataFrame for any other
error

logging.info("Max retries exceeded. Unable to fetch data.")


return pd.DataFrame() # Return an empty DataFrame if max retries exceeded

# Define function to remove outliers


def remove_outliers(df, column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df

# Define function to create moving averages


def create_moving_averages(df, column, window_sizes):
for window_size in window_sizes:
df[f'ma{window_size}'] = df[column].rolling(window=window_size).mean()
return df

# Define function to normalize numerical features


def normalize_numerical_features(df, columns_to_normalize):
for column in columns_to_normalize:
df[column] = (df[column] - df[column].min()) / (df[column].max() -
df[column].min())
return df

# Define function to retrain or load existing model


def retrain_or_load_model(symbol):
model_filename = f'xgboost_model_{symbol.replace("/", "_")}.pkl'
if os.path.isfile(model_filename):
logging.info(f"Model found for {symbol}. Loading...")
model = joblib.load(model_filename)
return model
else:
logging.info(f"No model found for {symbol}. Training new model...")
return train_model(symbol)

# Define function to train model


def train_model(symbol):
# Fetch and preprocess data
timeframe = '1m'
limit = 10000
data = asyncio.run(fetch_and_preprocess_data_with_target_async(symbol,
timeframe, limit))

if data.empty:
logging.error("Data fetching failed. Exiting...")
return None

# Split data into features and target


X = data.drop(['timestamp', 'target'], axis=1)
y = data['target']

# Perform hyperparameter tuning using GridSearchCV


param_grid = {
'n_estimators': [50, 100, 150],
'max_depth': [3, 5, 7],
'learning_rate': [0.05, 0.1, 0.2],
'subsample': [0.8, 0.9, 1.0],
'colsample_bytree': [0.8, 0.9, 1.0]
}

# Time series cross-validation


tscv = TimeSeriesSplit(n_splits=5)

# Initialize XGBoost regressor


xgb_model = xgb.XGBRegressor()

# Grid search
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid,
cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Best estimator
best_estimator = grid_search.best_estimator_

# Save the trained model


joblib.dump(best_estimator, f'xgboost_model_{symbol.replace("/", "_")}.pkl')
return best_estimator

# Define function to execute trading strategy


async def execute_real_time_trading(symbol, model):
while True:
# Fetch and preprocess data
data = await fetch_and_preprocess_data_with_target_async(symbol, '1m', 1)
if not data.empty:
X = data.drop(['timestamp', 'target'], axis=1)
predictions = model.predict(X)
# Implement trading strategy using predictions
# ...
time.sleep(60) # Wait for 1 minute before fetching new data
else:
logging.warning("Data fetching failed. Retrying in 5 seconds...")
await asyncio.sleep(5)

# Define function to start real-time trading


async def start_real_time_trading(symbol):
# Retrain or load existing model
model = retrain_or_load_model(symbol)

# If model exists, start trading


if model:
await execute_real_time_trading(symbol, model)
else:
logging.error("Model training failed. Exiting...")

# Create a matplotlib figure for candlestick chart


def plot_candlestick_chart(symbol):
# Fetch historical data for chart
data = asyncio.run(fetch_and_preprocess_data_with_target_async(symbol, '1d',
365))
if not data.empty:
data['timestamp'] = data['timestamp'].apply(mdates.date2num)
ohlc = data[['timestamp', 'open', 'high', 'low', 'close']].values
fig, ax = plt.subplots(figsize=(10, 5))
candlestick_ohlc(ax, ohlc, width=0.6, colorup='g', colordown='r', alpha=0.8)
ax.xaxis_date()
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.set_title(f'{symbol} Candlestick Chart')
ax.set_xlabel('Date')
ax.set_ylabel('Price')
plt.show()
else:
logging.error("Failed to fetch historical data for chart.")

# Fetch and preprocess data


symbol = 'BTC/USD' # Example cryptocurrency symbol
timeframe = '1m'
limit = 10000
data = asyncio.run(fetch_and_preprocess_data_with_target_async(symbol,
timeframe, limit))

# Check if data is empty


if not data.empty:
# Train or load model
model = retrain_or_load_model(symbol)
if model:
# Start real-time trading
asyncio.run(start_real_time_trading(symbol))
else:
logging.error("Model training failed. Exiting...")
else:
logging.error("Data fetching failed. Exiting...")

# Plot candlestick chart


plot_candlestick_chart(symbol)

8__________________________________________________________________
___________________-

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score
import matplotlib.pyplot as plt
import ccxt
import time
import joblib
import json
import logging

# Set up logging configuration


logging.basicConfig(filename='model_training.log', level=logging.INFO, format='%(asctime)s
- %(levelname)s - %(message)s')

# Set up Kraken exchange


exchange = ccxt.kraken()

# Simulate fetching live data


def fetch_live_data():
try:
# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv('DOGE/USD', '1m', limit=50)
# Convert OHLCV data to DataFrame
df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close',
'volume'])
# Convert timestamp to datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
# Handling missing values
df.dropna(inplace=True)
# Check if there are enough data points
if len(df) < 50:
print("Insufficient data points. Exiting...")
return pd.DataFrame()
# Use the most recent data point for prediction
real_time_data = df.iloc[-1:].drop(['timestamp'], axis=1) # Remove 'timestamp', but
keep other columns

return real_time_data

except Exception as e:
print(f"Error fetching live data: {e}")
return pd.DataFrame()

# Function to fetch and preprocess data with target


def fetch_and_preprocess_data_with_target(symbol, timeframe, limit, max_retries=3,
retry_delay=5):
retries = 0
while retries < max_retries:
try:
# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)
# Convert OHLCV data to DataFrame
df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close',
'volume'])
# Convert timestamp to datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
# Handling missing values
df.dropna(inplace=True)
# Calculate percentage gain or loss based on closing prices
df['target'] = df['close'].pct_change() * 100
# Remove NaN values resulting from the pct_change operation
df.dropna(inplace=True)
# Remove outliers in the 'close' price column
df = remove_outliers(df, 'close')
# Create moving averages
df = create_moving_averages(df, 'close', [10, 50])
# Normalize numerical features
df = normalize_numerical_features(df, ['open', 'high', 'low', 'close', 'volume',
'ma10', 'ma50', 'target'])

return df
except ccxt.NetworkError as e:
print('Network error:', e)
print(f"Retrying ({retries+1}/{max_retries})...")
retries += 1
time.sleep(retry_delay)
except ccxt.ExchangeError as e:
print('Exchange error:', e)
return pd.DataFrame() # Return an empty DataFrame in case of exchange
error
except Exception as e:
print('Error:', e)
return pd.DataFrame() # Return an empty DataFrame for any other error
print("Max retries exceeded. Unable to fetch data.")
return pd.DataFrame() # Return an empty DataFrame if max retries exceeded

# Function to remove outliers


def remove_outliers(df, column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df

# Function to create moving averages


def create_moving_averages(df, column, window_sizes):
for window_size in window_sizes:
df[f'ma{window_size}'] = df[column].rolling(window=window_size).mean()
return df

# Function to normalize numerical features


def normalize_numerical_features(df, columns_to_normalize):
for column in columns_to_normalize:
df[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())
return df

# Function to calculate position size based on percentage of capital


def calculate_position_size(current_capital, crypto_price, position_percentage):
return (current_capital * position_percentage) / crypto_price

# Function to save model and metadata


def save_model_and_metadata(best_estimator, symbol, timeframe, mse, f1):
model_filename = f'xgboost_model_{symbol.replace("/", "_")}_{timeframe}.pkl'
joblib.dump(best_estimator, model_filename)
metadata = {
'crypto_symbol': symbol,
'timeframe': timeframe,
'model_filename': model_filename,
'mse': mse,
'f1_score': f1,
'timestamp': str(pd.Timestamp.now())
}
metadata_filename = f'model_metadata_{symbol.replace("/", "_")}_{timeframe}.json'
with open(metadata_filename, 'w') as f:
json.dump(metadata, f)

logging.info(f"Model saved as: {model_filename}")


logging.info(f"Metadata saved as: {metadata_filename}")

# Fetch and preprocess data


symbol = 'DOGE/USD' # Example cryptocurrency symbol
timeframes = ['1m', '5m', '10m', '15m', '20m', '25m', '30m', '35m', '40m', '45m', '50m', '55m',
'1h']
limit = 20000 # Increased amount of data to train larger models

for timeframe in timeframes:


logging.info(f"Training and evaluating model for timeframe: {timeframe}")
data = fetch_and_preprocess_data_with_target(symbol, timeframe, limit)

# Check if data is empty


if not data.empty:
# Split data into features and target
X = data.drop(['timestamp', 'target'], axis=1)
y = data['target']

# Define hyperparameters for grid search


param_grid = {
'n_estimators': [50, 100, 150],
'max_depth': [3, 5, 7],
'learning_rate': [0.05, 0.1, 0.2],
'subsample': [0.8, 0.9, 1.0],
'colsample_bytree': [0.8, 0.9, 1.0]
}

# Time series cross-validation


tscv = TimeSeriesSplit(n_splits=5)

# Initialize XGBoost regressor


xgb_model = xgb.XGBRegressor()

# Grid search
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid,
cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Retrieve best parameters and estimator


best_estimator = grid_search.best_estimator_

# Evaluate the best model


predictions = best_estimator.predict(X)
mse = mean_squared_error(y, predictions)
f1 = f1_score(y > 0, predictions > 0)

logging.info(f"MSE for {timeframe}: {mse}")


logging.info(f"F1 Score for {timeframe}: {f1}")

# Save model and metadata


save_model_and_metadata(best_estimator, symbol, timeframe, mse, f1)

else:
logging.error(f"Data fetching failed for timeframe: {timeframe}")
logging.info("Training and evaluation completed for all timeframes.")

You might also like