0% found this document useful (0 votes)
34 views

Split and Create Py File

This Python script performs the following tasks: 1. It combines all CSV files in the current directory into a single file called "combine_param.csv". 2. It then removes any duplicate rows from the combined file. 3. It filters out any rows where a specified column contains a false value. 4. It splits the data into separate files based on unique index values and writes them to a new directory. 5. It optionally converts an IPython notebook into Python scripts and customizes each one for the different parameter files.

Uploaded by

kakashi hatake
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
34 views

Split and Create Py File

This Python script performs the following tasks: 1. It combines all CSV files in the current directory into a single file called "combine_param.csv". 2. It then removes any duplicate rows from the combined file. 3. It filters out any rows where a specified column contains a false value. 4. It splits the data into separate files based on unique index values and writes them to a new directory. 5. It optionally converts an IPython notebook into Python scripts and customizes each one for the different parameter files.

Uploaded by

kakashi hatake
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

import os, sys

import numpy as np
import pandas as pd
from glob import glob

### combine parameter


print("Combine Parameters")
all_parameters = glob('*.csv')

### Combine ###


Combine_data = pd.DataFrame()
for file in all_parameters:
print(file)
data = pd.read_csv(file)
Combine_data = pd.concat([Combine_data, data], ignore_index=True)

for file in all_parameters:


os.remove(file)

Combine_data.to_csv("combine_param.csv", index=False)
# --------------------------------------------------------------------------

### Remove Duplicates ###


if not Combine_data.empty:

print("Remove Duplicates")
initial_length = len(Combine_data)
print('Total Rows -', initial_length)

#Drop Dublicate
Combine_data.drop_duplicates(inplace=True)
final_length = len(Combine_data)
print('Dublicates Rows -', initial_length - final_length)
# ---------------------------------------------------------------------------

# remove false rows


if not Combine_data.empty:
print("\nRemove False Parameter")
print(Combine_data.columns.to_series().reset_index(drop=True))

col_index = int(input('Column True/False Index : '))


col_name = Combine_data.columns.to_list()[col_index]

initial_length = len(Combine_data)
print('Total Rows -', initial_length)

#Drop Dublicate
Combine_data = Combine_data[Combine_data[col_name] == True]
final_length = len(Combine_data)
print('Falsed Rows -', initial_length - final_length)
# ---------------------------------------------------------------------------

if not Combine_data.empty:

output_path = ''

if not os.path.isdir(output_path) and output_path != '':


os.mkdir(output_path)
bn_data = Combine_data[Combine_data['index'] == 'BANKNIFTY']
nf_data = Combine_data[Combine_data['index'] == 'NIFTY']
fn_data = Combine_data[Combine_data['index'] == 'FINNIFTY']

print('Total Rows -', len(Combine_data))


print('BANKNIFTY Rows -', len(bn_data))
print('NIFTY Rows-', len(nf_data))
print('FINNIFTY Rows-', len(fn_data))

# Split BANKNIFTY data


if not bn_data.empty:

no_of_param = int(input("BANKNIFTY Parameter No : "))

for idx, data in enumerate(np.array_split(bn_data, no_of_param)):


data.to_csv(f"{output_path}bn_{idx}.csv", index=False)

# Split NIFTY data


if not nf_data.empty:

no_of_param = int(input("NIFTY Parameter No : "))

for idx, data in enumerate(np.array_split(nf_data, no_of_param)):


data.to_csv(f"{output_path}nf_{idx}.csv", index=False)

# Split FINNIFTY data


if not fn_data.empty:

no_of_param = int(input("FINNIFTY Parameter No : "))

for idx, data in enumerate(np.array_split(fn_data, no_of_param)):


data.to_csv(f"{output_path}fn_{idx}.csv", index=False)

if input("create py files y/n ? : ") == "y":

import os
import sys

### convert into py files


os.system('jupyter nbconvert --to script *.ipynb')

def prepend_line(file_name, line, name):


""" Insert given string as a new line at the beginning of a file """
# define name of temporary dummy file
dummy_file = file_name + '.bak'
# open original file in read mode and dummy file in write mode
with open(file_name, 'r') as read_obj, open(dummy_file, 'w') as write_obj:
# Write given line to the dummy file
write_obj.write(line + '\n')
# Read lines from original file one by one and append them to the dummy
file
for line in read_obj:
write_obj.write(line)

os.rename(dummy_file, name)

def code_script(name):
code_script = f"""import os
import sys
code_name = sys.argv[0].split('\\\\')[-1].replace('.py', '')
import ctypes
ctypes.windll.kernel32.SetConsoleTitleW(code_name)
print(code_name)
parameter_path = '{name}'
"""
return code_script

parameters = glob(f'{output_path}*.csv')
parameters = list(set(parameters) - set(["combine_param.csv"]))
code = glob("*.ipynb")[0].replace('ipynb','py')

import fileinput

tempFile = open(code, 'r+')


for line in fileinput.input(code):
tempFile.write(line.replace('parameter_path =', '# parameter_path ='))
tempFile.close()

for file in parameters:


prepend_line(code, code_script(file), file.replace('csv', 'py'))

os.remove(code)

You might also like