0% found this document useful (0 votes)
6 views

Python Day 14 (Typed Notes) - Data Extraction Test Cases

Uploaded by

Amit Katkar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views

Python Day 14 (Typed Notes) - Data Extraction Test Cases

Uploaded by

Amit Katkar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

ETL Testing Test Cases

Test Cases for Data Extraction


1. Testing the data extraction from source being CSV and compare with Expected
output.
import pandas as pd
import pytest

# actual data
@pytest.fixture()
def csv_file_path():
#return 'employee.csv'
#return "C:/Users/hetur/PycharmProjects/ObjectOrientedP/employee.csv"
return '../employee.csv'

# expected data
@pytest.fixture()

def expected_csv_data():
return pd.DataFrame({'eno':[1,2,3],'ename':['a','b','c']})

def test_csv_extraction(csv_file_path,expected_csv_data):
data = pd.read_csv(csv_file_path)
assert data.equals(expected_csv_data),"Data extraction failed"

2. Test Extraction from JSON and validate against target ( .csv file )
import pandas as pd
import pytest

@pytest.fixture()
def json_file_path():
return "../Test_data/city.json"

@pytest.fixture()
def csv_file_path():
return "../Test_data/city.csv"
def test_json_extract_Validation(json_file_path,csv_file_path):
df_src_json = pd.read_json(json_file_path)
df_tgt_csv = pd.read_csv(csv_file_path)
assert df_tgt_csv.equals(df_src_json),"Json extraction failed - Please verify the
cause"

3. Test Extraction validation from ORACLE and Target as MYSQL

import pandas as pd
import pytest
# for connecting mysql database
from sqlalchemy import create_engine
# for connecting oracle database
import cx_Oracle

@pytest.fixture()
def connect_to_mySQL_TGT():
engine =
create_engine("mysql+pymysql://root:Admin%40143@localhost:3308/etlautomation")
connection_mysql = engine.connect()
yield connection_mysql
connection_mysql.close()

@pytest.fixture()
def connect_to_oracle_SRC():
engine = create_engine("oracle+cx_oracle://system:admin@localhost:1521/xe")
connection_oracle = engine.connect()
yield connection_oracle
connection_oracle.close()

def
test_dataExtractionCheckInDatabase(connect_to_mySQL_TGT,connect_to_oracle_SRC):
query_orcl_src = 'SELECT * FROM CITY'
query_mysql_tgt = 'SELECT * FROM CITY'

df_orcl_src = pd.read_sql(query_orcl_src,connect_to_oracle_SRC)
df_mysql_tgt = pd.read_sql(query_mysql_tgt, connect_to_mySQL_TGT)
assert df_mysql_tgt.equals(df_orcl_src),"Data between source anf Target is different-
please invetsigate"
Test Cases for Data Quality
1. Check for Missing Values in CSV Data Extraction

import pandas as pd
import pytest

@pytest.fixture()
def csv_file_path():
return "../Test_data/DQ_employee.csv"

def test_csv_missing_value(csv_file_path):
df_tgt_csv = pd.read_csv(csv_file_path)
missing_value = df_tgt_csv.isnull().sum().sum()
assert missing_value == 0 ,"Please check why there is missing values in the target
file"

Assignments:
1. Create test script for Data validation for source being .xml file and validate against
target system ( .CSV file )
2. Create Data Quality (DQ checks) for Target Systems:
a) Duplicate records check for .CSV file
b) Data Type checks for mySQL database table

You might also like