0% found this document useful (0 votes)

22 views4 pages

XX

This Python script extracts data from SQL INSERT statements into CSV files, loads the CSV data into a PostgreSQL database, and handles splitting the extraction into multiple CSV files if the row limit is exceeded.

Uploaded by

Muhdhanafi

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

22 views4 pages

XX

Uploaded by

Muhdhanafi

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 4

import re

import csv
import os
import psycopg2

def extract_data_to_csv(input_file, output_folder, max_rows_per_file, db_params):

# Define the regular expression pattern to extract values from INSERT
statements
pattern = re.compile(r"INSERT\s+\[dbo\]\.\[Mobile\]\s+\(\[Id\],\s+\[IC\],\s+\
[Name\],\s+\[Mobile_Number\],\s+\[Address1\],\s+\[Address2\],\s+\[State_Name\],\s+\
[Data_Source_Id\],\s+\[Data_Filename\],\s+\[WA_Status\]\)\s+VALUES\s+\((\d+),\
s+N'([^']+)',\s+N'([^']+)',\s+N'([^']+)',\s+N'([^']*)',\s+(?:NULL|N'([^']*)'),\
s+N'([^']+)',\s+(\d+),\s+N'([^']+)',\s+(\d+)\)")

# Create the output folder if it doesn't exist

if not os.path.exists(output_folder):
os.makedirs(output_folder)

# Initialize connection to the database

conn = psycopg2.connect(**db_params)
cur = conn.cursor()

# Initialize CSV index

csv_index = 1

# Initialize row count

row_count = 0

# Initialize set to keep track of processed files

processed_files = set()

# Open the input SQL file for reading

with open(input_file, 'r', encoding='utf-16', errors='ignore') as f_in:
# Initialize CSV writer
csv_writer = None

# Initialize the current CSV file

output_file = None

# Iterate over lines in the input SQL file

for line in f_in:
# Skip lines containing null bytes
if '\x00' in line:
continue

# Match the pattern in each line

match = pattern.match(line)
#print("Matched Line:", line) # Add this print statement to see the
matched line
if match:
# Extract values from the matched groups
values = match.groups()
print("Extracted Values:", values) # Add this print statement to
see extracted values
# Remove double quotes from extracted values except for Address1
values = [value.strip('"') if value is not None and idx != 4 else
value for idx, value in enumerate(values)]
# Replace empty values with None
values = [None if value == "''" else value for value in values] #
Modify this line

# Generate the filename based on the sequential index if not

created yet
if output_file is None:
output_file = os.path.join(output_folder, f"{csv_index}.csv")

# Open a new CSV file for writing

csv_file = open(output_file, 'a', newline='', encoding='utf-8')
csv_writer = csv.writer(csv_file)
# Write header to the first file
if row_count == 0:
csv_writer.writerow(['Id', 'IC', 'Name', 'Mobile_Number',
'Address1', 'Address2', 'State_Name', 'Data_Source_Id', 'Data_Filename',
'WA_Status'])

# Write the extracted values to the CSV file

csv_writer.writerow(values)
row_count += 1

# Open a new CSV file if the row count exceeds the maximum rows per
file
if row_count >= max_rows_per_file:
# Close the current CSV file
csv_file.close() # Close the file object directly

# Load data into database using COPY command

if output_file not in processed_files:
copy_csv_to_postgres(output_file, 'bank', cur)
print(f"Data from {output_file} copied to database.")
# Add the file to the set of processed files
processed_files.add(output_file)
else:
print(f"Skipping file {output_file} as it's already
processed.")

# Increment the CSV index for the next file

csv_index += 1

# Reset the row count

row_count = 0

# Reset the current CSV file

output_file = None

# Close the last CSV file if it exists

if csv_writer is not None:
csv_file.close() # Close the file object directly
# Load data into database using COPY command
if output_file not in processed_files:
copy_csv_to_postgres(output_file, 'bank', cur)
print(f"Data from {output_file} copied to database.")
# Add the file to the set of processed files
processed_files.add(output_file)
else:
print(f"Skipping file {output_file} as it's already processed.")

# Commit changes and close database connection

conn.commit()
cur.close()
conn.close()

print("Data extraction to CSV and insertion into database completed.")

def copy_csv_to_postgres(csv_file_path, table_name, cursor):

"""Loads CSV data into PostgreSQL table using COPY command."""
try:
with open(csv_file_path, 'r', newline='', encoding='utf-8') as csv_file:
# Manually specify column names based on table schema
columns = ['Id', 'IC', 'Name', 'Mobile_Number', 'Address1', 'Address2',
'State_Name', 'Data_Source_Id', 'Data_Filename', 'WA_Status']

# Skip the header row

next(csv_file)

# Copy data from the CSV file to the PostgreSQL table

for line in csv_file:
# Split the line into values while considering the possibility of
commas within quotes
values = csv.reader([line]).__next__()

# Remove the first and last double quotes, if present, from each
value
values = [value.strip('"') for value in values]
# Replace double quotes within values with two double quotes to
escape them
values = [value.replace('"', '""') for value in values]

# Check if any value is an empty string, if so, replace it with

None
values = [None if value == '' else value for value in values]

# Write the values to the database

cursor.execute(
f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({',
'.join(['%s']*len(columns))})",
values
)

print(f"Data from {csv_file_path} copied to {table_name} table.")

except Exception as e:
print(f"Failed to insert data into {table_name} table from {csv_file_path}.
Error: {str(e)}")

# Example usage:
input_file = r"D:\Backup\Script MobileDB - Data\dbo.Mobile.Table.sql"
output_folder = r"D:\Backup\CSV"
max_rows_per_file = 1048576 # Example maximum rows per file
db_params = {
'dbname': 'mobiledb',
'user': 'postgres',
'password': 'supostgres',
'host': 'localhost',
'port': '5432'
}
extract_data_to_csv(input_file, output_folder, max_rows_per_file, db_params)

Exam 1Z0 770 PDF Dumps 2023
0% (1)
Exam 1Z0 770 PDF Dumps 2023
5 pages
Python Code
No ratings yet
Python Code
5 pages
Project Cover Page (AutoRecovered)
No ratings yet
Project Cover Page (AutoRecovered)
22 pages
Notes 120725
No ratings yet
Notes 120725
4 pages
Property Database (Market Value Finder)
No ratings yet
Property Database (Market Value Finder)
4 pages
Yessss
No ratings yet
Yessss
38 pages
Project File Part 4
No ratings yet
Project File Part 4
5 pages
ASSIGNS CLASS-12-PART-4-Final
No ratings yet
ASSIGNS CLASS-12-PART-4-Final
11 pages
GR 12 CS Practical Programs (11-15)
No ratings yet
GR 12 CS Practical Programs (11-15)
12 pages
Reading and Writing Files
No ratings yet
Reading and Writing Files
4 pages
Full File Handling Tutorial
No ratings yet
Full File Handling Tutorial
58 pages
Kunj Project 2
No ratings yet
Kunj Project 2
31 pages
CSV File Handling
No ratings yet
CSV File Handling
20 pages
CSV Files Worksheet
No ratings yet
CSV Files Worksheet
7 pages
CS Record Set 3
No ratings yet
CS Record Set 3
8 pages
Cs Practical
No ratings yet
Cs Practical
8 pages
Walmart Module4
No ratings yet
Walmart Module4
2 pages
Project Format
No ratings yet
Project Format
28 pages
CSV File Handling Assignment (2024-25)
No ratings yet
CSV File Handling Assignment (2024-25)
4 pages
DW - DW Internal 1 - Merged
No ratings yet
DW - DW Internal 1 - Merged
12 pages
Pre 1 Cs Ans
No ratings yet
Pre 1 Cs Ans
9 pages
CSV Files
No ratings yet
CSV Files
22 pages
INFORMATIC Complete Project
No ratings yet
INFORMATIC Complete Project
27 pages
CSV File Handling
No ratings yet
CSV File Handling
2 pages
Python Record Manual
No ratings yet
Python Record Manual
18 pages
Practical Assignment3
No ratings yet
Practical Assignment3
2 pages
Add A Column To File
No ratings yet
Add A Column To File
2 pages
Food Project
No ratings yet
Food Project
21 pages
Hotel Management
No ratings yet
Hotel Management
25 pages
Assiment 1
No ratings yet
Assiment 1
11 pages
23 Mca 10122
No ratings yet
23 Mca 10122
19 pages
Info
No ratings yet
Info
31 pages
CSV FILES Online
No ratings yet
CSV FILES Online
84 pages
CS Project CSV
No ratings yet
CS Project CSV
32 pages
Computer Project File
No ratings yet
Computer Project File
9 pages
CAKE Rishabh File
No ratings yet
CAKE Rishabh File
31 pages
Class Xii Computer Science Practical Programs - 2022-23 2
No ratings yet
Class Xii Computer Science Practical Programs - 2022-23 2
33 pages
Grade 12 CS Board Practicals BINARY
No ratings yet
Grade 12 CS Board Practicals BINARY
10 pages
Assignment
No ratings yet
Assignment
14 pages
Fds Unit - III
No ratings yet
Fds Unit - III
58 pages
DAP Module3
No ratings yet
DAP Module3
42 pages
Practical Question Answers
No ratings yet
Practical Question Answers
5 pages
Excel Database Management
No ratings yet
Excel Database Management
3 pages
Chapter 4 - Import-Export Data
No ratings yet
Chapter 4 - Import-Export Data
30 pages
Module 3 Notes
No ratings yet
Module 3 Notes
45 pages
12th Board Practical
No ratings yet
12th Board Practical
8 pages
Comp Proj 2
No ratings yet
Comp Proj 2
11 pages
Movie Ticket Booking
No ratings yet
Movie Ticket Booking
30 pages
CSV Files Worksheet Ans
No ratings yet
CSV Files Worksheet Ans
9 pages
CS Documentation
No ratings yet
CS Documentation
24 pages
SET-2 Python Practical (3-5
No ratings yet
SET-2 Python Practical (3-5
4 pages
Importing Data Python Cheat Sheet PDF
No ratings yet
Importing Data Python Cheat Sheet PDF
1 page
Introduction To Python
No ratings yet
Introduction To Python
14 pages
Chapter2 - Data Wrangling
No ratings yet
Chapter2 - Data Wrangling
48 pages
CSV New
No ratings yet
CSV New
4 pages
All Programs 2024 To Back 2021 CSV Qs
No ratings yet
All Programs 2024 To Back 2021 CSV Qs
8 pages
Chapter5 3CSVFile
No ratings yet
Chapter5 3CSVFile
7 pages
DHP Journal
No ratings yet
DHP Journal
29 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
C++ Functions and tutorial
From Everand
C++ Functions and tutorial
Nino Paiotta
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
Cassandra Complete Notes
No ratings yet
Cassandra Complete Notes
5 pages
SQL Assignment
100% (2)
SQL Assignment
3 pages
Database Bca
No ratings yet
Database Bca
148 pages
Srilakshmi M Resume
No ratings yet
Srilakshmi M Resume
2 pages
Resume Aarti Yadav
No ratings yet
Resume Aarti Yadav
1 page
How To Connect Python Programs To MariaDB
No ratings yet
How To Connect Python Programs To MariaDB
6 pages
Bat A006068 MFM - Jasaul Haq Dad
No ratings yet
Bat A006068 MFM - Jasaul Haq Dad
124 pages
Oracle DBA Queries - Recovery Manager (RMAN)
No ratings yet
Oracle DBA Queries - Recovery Manager (RMAN)
4 pages
Itpc 101 - Chapter II
No ratings yet
Itpc 101 - Chapter II
18 pages
Oracle Database 11g-Administration Workshop I - D50102GC10
No ratings yet
Oracle Database 11g-Administration Workshop I - D50102GC10
3 pages
Pass Microsoft 70-461 Exam With 100% Guarantee: Querying Microsoft SQL Server 2012/2014
No ratings yet
Pass Microsoft 70-461 Exam With 100% Guarantee: Querying Microsoft SQL Server 2012/2014
8 pages
Summary of Mapping Constructs and Constraints: Table 7.1 Correspondence Between ER and Relational Models
No ratings yet
Summary of Mapping Constructs and Constraints: Table 7.1 Correspondence Between ER and Relational Models
11 pages
Te Dbms Unit 1 Part I Te Notes
No ratings yet
Te Dbms Unit 1 Part I Te Notes
24 pages
Orm in Django: Hoang Nguyen
No ratings yet
Orm in Django: Hoang Nguyen
50 pages
18 - HBase Schema Design
No ratings yet
18 - HBase Schema Design
22 pages
Advanced Goldengate Configuration ConflictDetect Resolution PDF
No ratings yet
Advanced Goldengate Configuration ConflictDetect Resolution PDF
28 pages
Connecting SAP On Oracle To External MYSQL
No ratings yet
Connecting SAP On Oracle To External MYSQL
10 pages
Know More About Each File in ESSBASE
No ratings yet
Know More About Each File in ESSBASE
1 page
Ie Avl Tree Flowchart Explaination
No ratings yet
Ie Avl Tree Flowchart Explaination
3 pages
Specia CIA-1 DBMS Solutions Submitted by Prince Verma (2000910130073)
No ratings yet
Specia CIA-1 DBMS Solutions Submitted by Prince Verma (2000910130073)
6 pages
Database Management System QB
No ratings yet
Database Management System QB
5 pages
Akash Shukla US
No ratings yet
Akash Shukla US
1 page
Functional Dependency and Normalization
No ratings yet
Functional Dependency and Normalization
10 pages
Java Web Development With MongoDB (Presented at Devoxx 2010)
No ratings yet
Java Web Development With MongoDB (Presented at Devoxx 2010)
129 pages
Cassandra
No ratings yet
Cassandra
31 pages
2 Ip 12 Notes RDBMS 2022 PDF
No ratings yet
2 Ip 12 Notes RDBMS 2022 PDF
16 pages
DMS Pract Exam
No ratings yet
DMS Pract Exam
41 pages
ER Relational Model
No ratings yet
ER Relational Model
40 pages
Big Data Management
No ratings yet
Big Data Management
55 pages

XX

Uploaded by

XX

Uploaded by

import re

def extract_data_to_csv(input_file, output_folder, max_rows_per_file, db_params):

# Create the output folder if it doesn't exist

# Initialize connection to the database

# Initialize CSV index

# Initialize row count

# Initialize set to keep track of processed files

# Open the input SQL file for reading

# Initialize the current CSV file

# Iterate over lines in the input SQL file

# Match the pattern in each line

# Generate the filename based on the sequential index if not

# Open a new CSV file for writing

# Write the extracted values to the CSV file

# Load data into database using COPY command

# Increment the CSV index for the next file

# Reset the row count

# Reset the current CSV file

# Close the last CSV file if it exists

# Commit changes and close database connection

print("Data extraction to CSV and insertion into database completed.")

def copy_csv_to_postgres(csv_file_path, table_name, cursor):

# Skip the header row

# Copy data from the CSV file to the PostgreSQL table

# Check if any value is an empty string, if so, replace it with

# Write the values to the database

print(f"Data from {csv_file_path} copied to {table_name} table.")

You might also like