Final Proj Imp and Test
Final Proj Imp and Test
IMPLEMENTATION
num_classes = len(code_dict)
save_tokenizer(tokenizer, 'tokenizer.pkl')
ICD.py
import numpy as np
from tensorflow.keras.models import load_model # type: ignore
from tensorflow.keras.preprocessing.text import Tokenizer # type: ignore
from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
import pandas as pd
import pickle
def load_tokenizer(filename):
with open(filename, 'rb') as f:
tokenizer = pickle.load(f)
return tokenizer
def load_icd_codes(filename):
dataset = pd.read_csv(filename)
icd_codes = dataset['icd_code'].tolist()
billing_info = dataset['total_cost'].tolist()
code_dict = {code: billing for code, billing in zip(icd_codes, billing_info)}
return code_dict
max_sequence_length = 100
# Example usage
def chatbot():
print("Welcome to the Medical Chatbot!")
total_billing = 0
while True:
# Input fields
if predicted_icd:
print("Predicted ICD code:", predicted_icd)
print("Billing Information: $", billing)
# Accumulate billing
total_billing += billing
else:
print("Sorry, I couldn't predict the ICD code for your symptoms.")
training = pd.read_csv('fyp_proj\Training.csv')
testing= pd.read_csv('fyp_proj\Testing.csv')
cols= training.columns
cols= cols[:-1]
x = training[cols]
y = training['prognosis']
y1= y
reduced_data = training.groupby(training['prognosis']).max()
testx = testing[cols]
testy = testing['prognosis']
testy = le.transform(testy)
clf1 = DecisionTreeClassifier()
clf = clf1.fit(x_train,y_train)
# print(clf.score(x_train,y_train))
# print ("cross result========")
scores = cross_val_score(clf, x_test, y_test, cv=3)
# print (scores)
print (scores.mean())
y_pred = clf.predict(x_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# Calculate precision
precision = precision_score(y_test, y_pred, average='weighted')
print("Precision:", precision)
# Calculate recall
recall = recall_score(y_test, y_pred, average='weighted')
print("Recall:", recall)
# Calculate F1 score
f1 = f1_score(y_test, y_pred, average='weighted')
print("F1 Score:", f1)
severityDictionary=dict()
description_list = dict()
precautionDictionary=dict()
symptoms_dict = {}
def getDescription():
global description_list
with open('fyp_proj\symptom_Description.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
_description={row[0]:row[1]}
description_list.update(_description)
def getSeverityDict():
global severityDictionary
with open('fyp_proj\Symptom_severity.csv') as csv_file:
severityDictionary.update(_diction)
except:
pass
def getprecautionDict():
global precautionDictionary
with open('fyp_proj\symptom_precaution.csv') as csv_file:
def getInfo():
print("-----------------------------------HealthCare
ChatBot-----------------------------------")
print("\nYour Name? \t\t\t\t",end="->")
name=input("")
print("Hello, ",name)
def check_pattern(dis_list,inp):
pred_list=[]
inp=inp.replace(' ','_')
patt = f"{inp}"
regexp = re.compile(patt)
pred_list=[item for item in dis_list if regexp.search(item)]
if(len(pred_list)>0):
return 1,pred_list
else:
return 0,[]
def sec_predict(symptoms_exp):
df = pd.read_csv('fyp_proj\Training.csv')
X = df.iloc[:, :-1]
y = df['prognosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=20)
rf_clf = DecisionTreeClassifier()
rf_clf.fit(X_train, y_train)
return rf_clf.predict([input_vector])
def print_disease(node):
node = node[0]
val = node.nonzero()
disease = le.inverse_transform(val[0])
return list(map(lambda x:x.strip(),list(disease)))
chk_dis=",".join(feature_names).split(",")
symptoms_present = []
while True:
if conf==1:
print("searches related to input: ")
for num,it in enumerate(cnf_dis):
print(num,")",it)
if num!=0:
print(f"Select the one you meant (0 - {num}): ", end="")
conf_inp = int(input(""))
else:
conf_inp=0
disease_input=cnf_dis[conf_inp]
break
# print("Did you mean: ",cnf_dis,"?(yes/no) :",end="")
# conf_inp = input("")
# if(conf_inp=="yes"):
# break
else:
print("Enter valid symptom.")
while True:
try:
num_days=int(input("Okay. From how many days ? : "))
break
except:
print("Enter valid input.")
def recurse(node, depth):
indent = " " * depth
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
threshold = tree_.threshold[node]
if name == disease_input:
val = 1
else:
val = 0
if val <= threshold:
recurse(tree_.children_left[node], depth + 1)
else:
symptoms_present.append(name)
recurse(tree_.children_right[node], depth + 1)
else:
present_disease = print_disease(tree_.value[node])
# print( "You may have " + present_disease )
red_cols = reduced_data.columns
symptoms_given =
red_cols[reduced_data.loc[present_disease].values[0].nonzero()]
# dis_list=list(symptoms_present)
# if len(dis_list)!=0:
# print("symptoms present " + str(list(symptoms_present)))
# print("symptoms given " + str(list(symptoms_given)) )
print("Are you experiencing any ")
symptoms_exp=[]
for syms in list(symptoms_given):
inp=""
print(syms,"? : ",end='')
while True:
inp=input("")
if(inp=="yes" or inp=="no"):
break
else:
print("provide proper answers i.e. (yes/no) : ",end="")
if(inp=="yes"):
symptoms_exp.append(syms)
second_prediction=sec_predict(symptoms_exp)
# print(second_prediction)
calc_condition(symptoms_exp,num_days)
if(present_disease[0]==second_prediction[0]):
else:
print("You may have ", present_disease[0], "or ", second_prediction[0])
print(description_list[present_disease[0]])
print(description_list[second_prediction[0]])
# print(description_list[present_disease[0]])
precution_list=precautionDictionary[present_disease[0]]
print("Take following measures : ")
for i,j in enumerate(precution_list):
print(i+1,")",j)
# confidence_level = (1.0*len(symptoms_present))/len(symptoms_given)
# print("confidence level is " + str(confidence_level))
recurse(0, 1)
getSeverityDict()
getDescription()
getprecautionDict()
getInfo()
tree_to_code(clf,cols)
print("Please remember to consult a healthcare professional,this chatbot is only meant
to help you narrow down your spectrum of illnesses and cannot be used as a
replacement consultant to a doctor! ")
print("----------------------------------------------------------------------------------------")
FirstAid.py
import json
import nltk
import numpy as np
import random
import tensorflow as tf
from nltk.stem import LancasterStemmer
from nltk.tokenize import word_tokenize
stemmer = LancasterStemmer()
model = tf.keras.models.load_model('EngChatbotModel.h5')
s_words = nltk.word_tokenize(s)
s_words = [stemmer.stem(word.lower()) for word in s_words]
for se in s_words:
for i, w in enumerate(words):
if w == se:
bag[i] = 1
return [bag]
return random.choice(responses)
response = get_response(inp)
print(response)
TESTING
7.1 TESTING APPROACH
To ensure robust performance evaluation and prevent overfitting, we devised a
systematic testing strategy comprising multiple stages. The primary objective was to
validate the effectiveness and generalizability of our ICD recommender system,
leveraging LSTM and Word2Vec models. The testing approach involved the
following key steps:
Firstly, we divided the original dataset into distinct testing and training subsets. The
testing subset represented a collection of unseen medical records, while the training
subset encompassed data used for model learning and parameter optimization. This
division facilitated the evaluation of model performance on unseen instances,
providing insights into its ability to generalize to new medical scenarios.
Subsequently, we trained the LSTM and Word2Vec models using the training dataset,
enabling them to learn the semantic representations of medical terms and relationships
between diseases and symptoms. The LSTM model captured temporal dependencies
in disease progression, while the Word2Vec model learned high-dimensional vector
representations of medical terms based on their contextual usage within the dataset.
Following model training, we evaluated the performance of the LSTM and Word2Vec
models using the testing dataset. Performance metrics such as accuracy, precision,
recall, and F1 score were computed to quantify the models' predictive accuracy and
classification capabilities. Additionally, we assessed the models' ability to recommend
appropriate ICD codes for a diverse range of medical scenarios, thereby ensuring their
utility across various clinical contexts.
To mitigate the risk of overfitting and ensure the reliability of our results, we
employed rigorous cross-validation techniques and utilized multiple independent
testing sets. This approach enabled us to validate the models' generalizability and
effectiveness in real-world healthcare scenarios, minimizing the impact of dataset bias
and variance.
7.1.1 Finally, based on the evaluation outcomes and feedback from domain experts,
iterative refinements were made to the LSTM and Word2Vec models to enhance
emergency scenarios, the tests verify that it offers relevant and effective first aid
advice.
Model Integration: The unit tests assess the integration of pre-trained models
within the First Aid Chatbot. This involves validating that the chatbot correctly
utilizes the underlying models to analyze emergency situations and generate
appropriate responses.
Robustness Testing: Unit tests evaluate the robustness of the First Aid Chatbot by
subjecting it to various types of emergency scenarios, including rare or complex
situations. This ensures that the chatbot can handle a wide range of emergency
scenarios effectively.
7.1.3 INTEGRATED TESTING
Integrated testing for our medical assistance system was conducted with rigorous
scenarios and comprehensive validation techniques, resulting in excellent results
across multiple dimensions. Here's an overview of the successful outcomes obtained
from the testing conducted:
Stress Testing:
Stress testing revealed robust performance and scalability of the system under
high loads and concurrent user interactions. Even under extreme conditions of
heavy user traffic, the system exhibited optimal responsiveness and resilience,
ensuring uninterrupted service and user satisfaction.
Compatibility Testing:
Compatibility testing yielded positive results, demonstrating consistent
functionality across different browsers. The user interface remained accessible
and user-friendly, regardless of the user's choice of browser.
Overall, the integrated testing phase validated the reliability, functionality, and
performance of our medical assistance system, highlighting its effectiveness in
providing users with a seamless and superior healthcare experience.