Python Codes
import sqlite3
import pandas as pd
# Database and file paths
DATABASE_PATH = 'database.db'
SPREADSHEET_0 = 'spreadsheet_0.csv'
SPREADSHEET_1 = 'spreadsheet_1.csv'
SPREADSHEET_2 = 'spreadsheet_2.csv'
def load_data(file_path):
"""Load a CSV file into a Pandas DataFrame."""
return pd.read_csv(file_path)
def insert_data_to_db(data, table_name, conn):
"""Insert Pandas DataFrame into SQLite database."""
data.to_sql(table_name, conn, if_exists='append', index=False)
def process_spreadsheets():
# Connect to SQLite database
conn = sqlite3.connect(DATABASE_PATH)
# Load spreadsheet 0 and insert directly
data_0 = load_data(SPREADSHEET_0)
insert_data_to_db(data_0, 'table_name_0', conn) # Replace 'table_name_0' with actual table name
# Load spreadsheets 1 and 2
data_1 = load_data(SPREADSHEET_1)
data_2 = load_data(SPREADSHEET_2)
# Merge spreadsheet 1 and 2 on shipping identifier
combined_data = data_1.merge(data_2, on='shipping_identifier')
# Calculate total quantity per shipment
combined_data['total_quantity'] =
combined_data.groupby('shipping_identifier')['quantity'].transform('sum')
# Prepare the final format for insertion
final_data = combined_data[['product', 'origin', 'destination', 'total_quantity']] # Adjust column
names as per schema
# Insert the processed data into the database
insert_data_to_db(final_data, 'table_name_combined', conn) # Replace with actual table name
# Close the connection
conn.close()
if __name__ == "__main__":
process_spreadsheets()