Co Digit Ooo
Co Digit Ooo
import pathlib
from pathlib import Path, PurePath
import os
from datetime import datetime, time, timedelta
import pytz
from polars.exceptions import ComputeError
import torch
import torch.jit
import torch.nn as nn
import numpy as np
import pandas as pd
from typing import Optional
import plotly.graph_objects as go
import plotly.io as pio
from colorama import init, Fore, Back, Style
def setup_file_paths():
# Directory where the script is located
directory_script = Path(__file__).resolve().parent
# absolute script path
absolute_path = PurePath(directory_script, Path(__file__).name)
# change the current working directory
os.chdir(str(directory_script))
return absolute_path
class CSVProcessor:
"""
Summary:
Class to process CSV files with a defined schema.
Attributes:
default_schema (dict): default schema for the data.
schema (dict): Schema to be used to process the data.
"""
def validate_input(self):
# Check if the input data is valid
pass
def csv_processor(
self,
show: bool = False,
return_tensor: bool = False,
start_date: str = None,
end_date: str = None,
generate_csv_file: bool = False,
) -> tuple[pl.DataFrame, Optional[torch.Tensor]]:
"""Process a CSV file and optionally return a Polars DataFrame and a
PyTorch tensor
Args:
show (bool, optional): Show the data frame. Defaults to False.
return_tensor (bool, optional): return a tensor. Defaults to False.
start_date (str, optional): the start date for the selection (format:
"YYYY.MM.DD"). Defaults to None.
end_date (str, optional): the end date for the selection (format:
"YYYY.MM.DD"). Defaults to None.
Returns:
tuple[pl.DataFrame, torch.Tensor]: return a polars data frame and a
tensor
Example:
>>> df, tensor = csv_processor(show=True, start_date="2019-01-7",
end_date="2019-01-8")
"""
try:
# Second, convert the 'time' column to the New York time zone,
considering daylight saving time (UTC-4 in summer, UTC-5 in winter).
df = df.with_columns(
pl.col("column_1").dt.convert_time_zone("America/New_York")
)
# rename columns
df = df.rename(
{
"column_1": "Date",
"column_3": "Open",
"column_4": "High",
"column_5": "Low",
"column_6": "Close",
}
)
# convert the strings dates to right format
if start_date is not None:
start_date = datetime.strptime(start_date, "%Y-%m-%d").astimezone(
pytz.timezone("America/New_York")
)
if end_date is not None:
end_date = datetime.strptime(end_date, "%Y-%m-%d").astimezone(
pytz.timezone("America/New_York")
)
# filter the table by start and end date
if start_date or end_date is not None:
df = df.filter(
(pl.col("Date") >= start_date) & (pl.col("Date") <= end_date)
)
else:
df_tensor = None
if show is True:
print(df.shape)
print(
"\nData frame:\n",
df,
"\nData frame converted to tensor:\n",
df_tensor,
)
if generate_csv_file is True:
# Define the path of CSV file
path = pathlib.Path(self.csv_main_file_name)
# Delete the time information
df = df.with_columns(
pl.col("Date").dt.to_string("%Y-%m-%d %H:%M:%S").alias("Date")
)
# Write the data frame to CSV file
df.write_csv(path)
Args:
time_frame (str): time frame, like 5m, 10m, 15m, 1h, etc...
Returns:
pl.Dataframe: A new data frame
Example:
>>> df = roup_and_write_csv_by('1m') # 1-minute candlestick
>>> df = roup_and_write_csv_by('1h') # 1-hour candlestick
"""
def scan_csv(
self, time_frame, materialize: bool = False
) -> pl.LazyFrame | pl.DataFrame:
"""Scans a CSV file and returns a LazyFrame or a DataFrame.
Args:
csv_file (str): Path to the CSV file.
materialize (bool, optional): If True, returns a materialized
DataFrame. Defaults to False.
Returns:
pl.LazyFrame | pl.DataFrame: A LazyFrame if materialize is False,
or a DataFrame if materialize is True.
"""
# file name that will be scan when the function is called
csv_file_name = f"{self.csv_name}_{time_frame}.csv"
def scan():
# headers: Date,Open,High,Low,Close
# schema == data type per each column, establish it for greater
scanning efficiency
# scan with a schema
df = pl.scan_csv(
csv_file_name, has_header=True, schema_overrides=self.schema
)
# convert text to time (data time)
df = df.with_columns(
pl.col("Date")
.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S")
.alias("Date")
)
return df
if os.path.exists(csv_file_name):
df = scan()
else:
self.group_and_write_csv_by(time_frame)
if materialize is True:
df = df.collect()
return df
def __init__(self):
self.check_format()
def check_format(self):
# check if the format of the output is correct
pass
def strategy_1(
self,
df: pl.DataFrame,
entry_time: str = "8:00",
order: str = "buy",
tp: int = 10,
sl: int = 2,
limit_time: str = "13:00:00",
lot_size: int = 0.05,
) -> pl.DataFrame:
"obtener el tiempo"
try:
hours, minutes = map(int, entry_time.split(":"))
except ValueError as exc:
raise ValueError(f"The format of entry_time must be 'HH:MM'.{exc}")
from exc
# get data frame filtered by time
df = df.filter(
(pl.col("Date").dt.hour() == hours)
& (pl.col("Date").dt.minute() == minutes)
)
# rename column
df = df.rename(
{
"Date": "entry_time",
}
)
# Get tp and sl
df = df.with_columns(
[
(pl.col("High") + int(tp)).alias("tp"),
(pl.col("Low") - int(sl)).alias("sl"),
]
)
#
=========================================================================== #
# This part is to get max and min values, do not delete this logic, just
optimize.
#
=========================================================================== #
#
=========================================================================== #
# This part indicates the time limit of the operation
# (this part is extremely important to avoid excessive ram consumption in
later filters).
#
=========================================================================== #
hour, minute, second = limit_time.split(":")
# create Date_max column
df = df.with_columns(
pl.datetime(
pl.col("entry_time").dt.year(),
pl.col("entry_time").dt.month(),
pl.col("entry_time").dt.day(),
int(hour), # new hour
int(minute), # new minute
int(second), # new second
).alias("Date_max")
)
return df
def strategy_2(
self,
df: pl.DataFrame,
lower_time_limit: str = "8:00",
upper_time_limit: str = "13:00",
min_candle_body: int = 2,
tp: int = 10,
sl: int = 3,
):
try:
lower_time_limit = datetime.strptime(lower_time_limit, "%H:%M").time()
upper_time_limit = datetime.strptime(upper_time_limit, "%H:%M").time()
except ValueError as exc:
raise ValueError(f"The format of time must be 'HH:MM'.{exc}") from exc
# Create a Time column to filter
df = df.with_columns(pl.col("Date").dt.time().alias("Time"))
#
=========================================================================== #
# This part is to get max and min values, do not delete this logic, just
optimize.
#
=========================================================================== #
#
=========================================================================== #
# This part indicates the time limit of the operation
# (this part is extremely important to avoid excessive ram consumption in
later filters).
#
=========================================================================== #
# Get Date_max column
df = df.with_columns(
pl.col("Date")
.dt.date() # Extract only the date from the column 'Date'.
.cast(pl.Datetime) # Convert to datetime
.dt.offset_by(
f"{upper_time_limit.hour}h{upper_time_limit.minute}m"
) # Add the time stored in the variable
.alias("Date_max") # New column with the resulting datetime
)
# rename column
df = df.rename(
{
"Date": "entry_time",
}
)
return df
def strategy_3(self):
pass
# ...
def validate_format(self):
"""_summary_
This method is used to validate the format of the data.
"""
# Check if the input data is in the correct format
pass
Args:
parameters (pl.DataFrame): parameters to make the review
df_1m (pl.DataFrame): the data base
"""
# declare variables
entry_time = parameters["entry_time"]
max_v = parameters["max"]
min_v = parameters["min"]
date_max = parameters["Date_max"]
max_db = df_1m["High"]
min_db = df_1m["Low"]
# list for storing the data frames of one row
data_frames = []
# loop to return the closing date of the trade
for entry_time_i, max_i, min_i, date_max_i, max_db_i, min_db_i in zip(
entry_time, max_v, min_v, date_max, max_db, min_db
):
a = entry_time_i, max_i, min_i, date_max_i, max_db_i, min_db_i
"In this section, is going to apply 3 filters"
# print(f"Date Range: {entry_time_i} TO {date_max_i}\nPrice Range:
{min_i} TO {max_i}")
# Select only the necessary columns
df_1m_limits = df_1m.select(["Date", "High", "Low"])
# First, filter by time. Filter the database
df_filtered_by_time = df_1m_limits.filter(
(pl.col("Date") >= entry_time_i) & (pl.col("Date") <= date_max_i)
)
# Second, filter by high and low. Filter the database
df = df_filtered_by_time.filter(
(pl.col("High") >= max_i) | (pl.col("Low") <= min_i)
)
# Third, get only the first value that fulfils the condition
df = df.head(1)
# if null, then close the order within the maximum time limit.
if df.is_empty():
df = df_filtered_by_time.select(pl.all().last())
else:
pass
# Fourth, stores the data frame in a list
data_frames.append(df)
# Join data frames
df_concat = pl.concat(data_frames)
# rename
df_concat = df_concat.rename(
{
"Date": "exit_time",
}
)
# Concatenate both data frames and delete unnecessary column
df_concat = pl.concat([parameters, df_concat], how="horizontal").drop(
"Date_max"
)
Args:
parameters (pl.DataFrame): _description_
df_1m (pl.DataFrame): _description_
"""
# <==============================================================>#
# <==================== GPU BACKTESTER ===========================>#
# <==============================================================>#
pass
def new_function(self):
print("Here will be a new function")
def write_csv(
self, df: pl.DataFrame, file_name: str = "back_test", show: bool = False
) -> None:
"""_summary_
Args:
df (pl.DataFrame): Dataframe to be written into CSV file
file_name (str, optional): Name of the output CSV file. Defaults to
"back_test".
show (bool, optional): Show results if you want. Defaults to False.
"""
# Write a DataFrame to a CSV file
# < ============================================================>#
# <==================== SAVE IN CVS FILE =======================>#
# < ============================================================>#
# In order to avoid wrong formats in 'Date' column, time is converted to
text.
df = df.with_columns(
pl.col(["entry_time", "exit_time"]).dt.to_string("%Y-%m-%d %H:%M:%S")
)
# generate the path
csv_path = f"{file_name}.csv"
# create a csv file by writing the data frame
df.write_csv(pathlib.Path(csv_path))
if show is True:
print(f"Data saved in {csv_path}")
print(df)
def save_graph_as_png(
self,
df: pl.DataFrame,
file_name: str = "chart",
title: str = "Graph",
x_title: str = "exit_time",
y_title: str = "Cumulative_utility",
) -> None:
"""_summary_
Args:
df (pl.DataFrame): Dataframe to be graph and save as png file
file_name (str, optional): Name of the output png file. Defaults to
"chart".
title (str, optional): Title of the graph. Defaults to "Graph".
x_title (str, optional): Dataframe's name column that will be x axis.
Defaults to "exit_time".
y_title (str, optional): Dataframe's name column that will be y axis.
Defaults to "Cumulative_utility".
"""
# < ============================================================>#
# < =============== SAVE UTILITY IN PNG FILE ===================>#
# < ============================================================>#
# create a new figure
# create figure
fig = go.Figure(
go.Scatter(
x=df[x_title],
y=df[y_title],
mode="lines",
name=title,
)
)
# make back testing with million of different parameters until found the right
strategy
class IntensiveBackTesting:
def __init__(self, df, strategy, risk_management, data_visualizer,
strategy_eval):
self.df = df
self.strategy = strategy
self.risk_management = risk_management
self.data_visualizer = data_visualizer
self.strategy_eval = strategy_eval
if __name__ == "__main__":
main_test()