0% found this document useful (0 votes)
42 views

Split and Create Py File

This Python script performs the following tasks: 1. It combines all CSV files in the current directory into a single file called "combine_param.csv". 2. It then removes any duplicate rows from the combined file. 3. It filters out any rows where a specified column contains a false value. 4. It splits the data into separate files based on unique index values and writes them to a new directory. 5. It optionally converts an IPython notebook into Python scripts and customizes each one for the different parameter files.

Uploaded by

kakashi hatake
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
42 views

Split and Create Py File

This Python script performs the following tasks: 1. It combines all CSV files in the current directory into a single file called "combine_param.csv". 2. It then removes any duplicate rows from the combined file. 3. It filters out any rows where a specified column contains a false value. 4. It splits the data into separate files based on unique index values and writes them to a new directory. 5. It optionally converts an IPython notebook into Python scripts and customizes each one for the different parameter files.

Uploaded by

kakashi hatake
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

import os, sys

import numpy as np
import pandas as pd
from glob import glob

### combine parameter


print("Combine Parameters")
all_parameters = glob('*.csv')

### Combine ###


Combine_data = pd.DataFrame()
for file in all_parameters:
print(file)
data = pd.read_csv(file)
Combine_data = pd.concat([Combine_data, data], ignore_index=True)

for file in all_parameters:


os.remove(file)

Combine_data.to_csv("combine_param.csv", index=False)
# --------------------------------------------------------------------------

### Remove Duplicates ###


if not Combine_data.empty:

print("Remove Duplicates")
initial_length = len(Combine_data)
print('Total Rows -', initial_length)

#Drop Dublicate
Combine_data.drop_duplicates(inplace=True)
final_length = len(Combine_data)
print('Dublicates Rows -', initial_length - final_length)
# ---------------------------------------------------------------------------

# remove false rows


if not Combine_data.empty:
print("\nRemove False Parameter")
print(Combine_data.columns.to_series().reset_index(drop=True))

col_index = int(input('Column True/False Index : '))


col_name = Combine_data.columns.to_list()[col_index]

initial_length = len(Combine_data)
print('Total Rows -', initial_length)

#Drop Dublicate
Combine_data = Combine_data[Combine_data[col_name] == True]
final_length = len(Combine_data)
print('Falsed Rows -', initial_length - final_length)
# ---------------------------------------------------------------------------

if not Combine_data.empty:

output_path = ''

if not os.path.isdir(output_path) and output_path != '':


os.mkdir(output_path)
bn_data = Combine_data[Combine_data['index'] == 'BANKNIFTY']
nf_data = Combine_data[Combine_data['index'] == 'NIFTY']
fn_data = Combine_data[Combine_data['index'] == 'FINNIFTY']

print('Total Rows -', len(Combine_data))


print('BANKNIFTY Rows -', len(bn_data))
print('NIFTY Rows-', len(nf_data))
print('FINNIFTY Rows-', len(fn_data))

# Split BANKNIFTY data


if not bn_data.empty:

no_of_param = int(input("BANKNIFTY Parameter No : "))

for idx, data in enumerate(np.array_split(bn_data, no_of_param)):


data.to_csv(f"{output_path}bn_{idx}.csv", index=False)

# Split NIFTY data


if not nf_data.empty:

no_of_param = int(input("NIFTY Parameter No : "))

for idx, data in enumerate(np.array_split(nf_data, no_of_param)):


data.to_csv(f"{output_path}nf_{idx}.csv", index=False)

# Split FINNIFTY data


if not fn_data.empty:

no_of_param = int(input("FINNIFTY Parameter No : "))

for idx, data in enumerate(np.array_split(fn_data, no_of_param)):


data.to_csv(f"{output_path}fn_{idx}.csv", index=False)

if input("create py files y/n ? : ") == "y":

import os
import sys

### convert into py files


os.system('jupyter nbconvert --to script *.ipynb')

def prepend_line(file_name, line, name):


""" Insert given string as a new line at the beginning of a file """
# define name of temporary dummy file
dummy_file = file_name + '.bak'
# open original file in read mode and dummy file in write mode
with open(file_name, 'r') as read_obj, open(dummy_file, 'w') as write_obj:
# Write given line to the dummy file
write_obj.write(line + '\n')
# Read lines from original file one by one and append them to the dummy
file
for line in read_obj:
write_obj.write(line)

os.rename(dummy_file, name)

def code_script(name):
code_script = f"""import os
import sys
code_name = sys.argv[0].split('\\\\')[-1].replace('.py', '')
import ctypes
ctypes.windll.kernel32.SetConsoleTitleW(code_name)
print(code_name)
parameter_path = '{name}'
"""
return code_script

parameters = glob(f'{output_path}*.csv')
parameters = list(set(parameters) - set(["combine_param.csv"]))
code = glob("*.ipynb")[0].replace('ipynb','py')

import fileinput

tempFile = open(code, 'r+')


for line in fileinput.input(code):
tempFile.write(line.replace('parameter_path =', '# parameter_path ='))
tempFile.close()

for file in parameters:


prepend_line(code, code_script(file), file.replace('csv', 'py'))

os.remove(code)

You might also like