Python Day 14 (Typed Notes) - Data Extraction Test Cases
Python Day 14 (Typed Notes) - Data Extraction Test Cases
# actual data
@pytest.fixture()
def csv_file_path():
#return 'employee.csv'
#return "C:/Users/hetur/PycharmProjects/ObjectOrientedP/employee.csv"
return '../employee.csv'
# expected data
@pytest.fixture()
def expected_csv_data():
return pd.DataFrame({'eno':[1,2,3],'ename':['a','b','c']})
def test_csv_extraction(csv_file_path,expected_csv_data):
data = pd.read_csv(csv_file_path)
assert data.equals(expected_csv_data),"Data extraction failed"
2. Test Extraction from JSON and validate against target ( .csv file )
import pandas as pd
import pytest
@pytest.fixture()
def json_file_path():
return "../Test_data/city.json"
@pytest.fixture()
def csv_file_path():
return "../Test_data/city.csv"
def test_json_extract_Validation(json_file_path,csv_file_path):
df_src_json = pd.read_json(json_file_path)
df_tgt_csv = pd.read_csv(csv_file_path)
assert df_tgt_csv.equals(df_src_json),"Json extraction failed - Please verify the
cause"
import pandas as pd
import pytest
# for connecting mysql database
from sqlalchemy import create_engine
# for connecting oracle database
import cx_Oracle
@pytest.fixture()
def connect_to_mySQL_TGT():
engine =
create_engine("mysql+pymysql://root:Admin%40143@localhost:3308/etlautomation")
connection_mysql = engine.connect()
yield connection_mysql
connection_mysql.close()
@pytest.fixture()
def connect_to_oracle_SRC():
engine = create_engine("oracle+cx_oracle://system:admin@localhost:1521/xe")
connection_oracle = engine.connect()
yield connection_oracle
connection_oracle.close()
def
test_dataExtractionCheckInDatabase(connect_to_mySQL_TGT,connect_to_oracle_SRC):
query_orcl_src = 'SELECT * FROM CITY'
query_mysql_tgt = 'SELECT * FROM CITY'
df_orcl_src = pd.read_sql(query_orcl_src,connect_to_oracle_SRC)
df_mysql_tgt = pd.read_sql(query_mysql_tgt, connect_to_mySQL_TGT)
assert df_mysql_tgt.equals(df_orcl_src),"Data between source anf Target is different-
please invetsigate"
Test Cases for Data Quality
1. Check for Missing Values in CSV Data Extraction
import pandas as pd
import pytest
@pytest.fixture()
def csv_file_path():
return "../Test_data/DQ_employee.csv"
def test_csv_missing_value(csv_file_path):
df_tgt_csv = pd.read_csv(csv_file_path)
missing_value = df_tgt_csv.isnull().sum().sum()
assert missing_value == 0 ,"Please check why there is missing values in the target
file"
Assignments:
1. Create test script for Data validation for source being .xml file and validate against
target system ( .CSV file )
2. Create Data Quality (DQ checks) for Target Systems:
a) Duplicate records check for .CSV file
b) Data Type checks for mySQL database table