trying to implement my diseases prediction model into streamlit webapp, because i am new in streamlit so idk how it works
Also i am new in machine learning too but not as new as streamlit.
main error might be in calling of the functions and making prediction in streamlit webapp code file where one error says – ‘DecisionTreeClassifier’ object has no attribute ‘features”
# Import Dependencies
import csv
import pandas as pd
import numpy as np
from collections import defaultdict
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
disease_list = []
def return_list(disease):
disease_list = []
match = disease.replace('^','_').split('_') # using _ as common splitting delimeter
ctr = 1
for group in match:
if ctr%2==0:
disease_list.append(group) # refer the data format
ctr = ctr + 1
return disease_list
with open("C:/Users/sahil/OneDrive/Desktop/CODE/Disease-Prediction-from-Symptoms-main/Datasets/raw_data_2.csv") as csvfile:
reader = csv.reader(csvfile)
disease=""
weight = 0
disease_list = []
dict_wt = {}
dict_=defaultdict(list)
for row in reader:
if row[0]!="xc2xa0" and row[0]!="": # for handling file encoding errors
# saving disease and frequency
disease = row[0]
disease_list = return_list(disease)
weight = row[1]
if row[2]!="xc2xa0" and row[2]!="":
symptom_list = return_list(row[2])
for d in disease_list:
for s in symptom_list:
dict_[d].append(s) # adding all symptoms
dict_wt[d] = weight
# saving cleaned data
with open("dataset_clean.csv","w") as csvfile:
writer = csv.writer(csvfile)
for key,values in dict_.items():
for v in values:
#key = str.encode(key)
key = str.encode(key).decode('utf-8')
#.strip()
#v = v.encode('utf-8').strip()
#v = str.encode(v)
writer.writerow([key,v,dict_wt[key]])
columns = ['Source','Target','Weight'] # source: disease, target: symptom, weight: number of cases
data = pd.read_csv("dataset_clean.csv",names=columns, encoding ="ISO-8859-1")
data.head()
data.to_csv("dataset_clean.csv",index=False)
data = pd.read_csv("dataset_clean.csv", encoding ="ISO-8859-1")
data.head()
len(data['Source'].unique()) # unique diseases
len(data['Target'].unique()) # unique symptoms
df = pd.DataFrame(data)
df_1 = pd.get_dummies(df.Target) # 1 hot encoding symptoms
df_1.head()
df.head()
df_s = df['Source']
df_pivoted = pd.concat([df_s,df_1], axis=1)
df_pivoted.drop_duplicates(keep='first',inplace=True)
df_pivoted[:5]
len(df_pivoted)
cols = df_pivoted.columns
cols = cols[1:] # removing heading
# visualizing existance of symptoms for diseases
df_pivoted = df_pivoted.groupby('Source').sum()
df_pivoted = df_pivoted.reset_index()
df_pivoted[:]
len(df_pivoted)
df_pivoted.to_csv("df_pivoted.csv")
# defining data for training
x = df_pivoted[cols]
y = df_pivoted['Source']
# importing all needed libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
# performing train test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
# Training multinomial naive bayes
mnb = MultinomialNB()
mnb = mnb.fit(x_train, y_train)
mnb.score(x_test, y_test)
mnb_tot = MultinomialNB()
mnb_tot = mnb_tot.fit(x, y)
mnb_tot.score(x, y)
disease_pred = mnb_tot.predict(x)
disease_real = y.values
# printing model error
for i in range(0, len(disease_real)):
if disease_pred[i]!=disease_real[i]:
print ('Pred: {0} Actual:{1}'.format(disease_pred[i], disease_real[i]))
from sklearn.tree import DecisionTreeClassifier, export_graphviz
print ("DecisionTree")
dt = DecisionTreeClassifier()
clf_dt=dt.fit(x,y)
print ("Acurracy: ", clf_dt.score(x,y))
from sklearn import tree
from sklearn.tree import export_graphviz
export_graphviz(dt,
out_file='tree.jpg',
feature_names=cols
)
from IPython.display import Image
Image(filename='tree.jpg')
data = pd.read_csv("C:/Users/sahil/OneDrive/Desktop/CODE/Disease-Prediction-from-Symptoms-main/Datasets/Training.csv")
data.head()
data.columns
len(data.columns)
len(data['prognosis'].unique())
df = pd.DataFrame(data)
df.head()
len(df)
cols = df.columns
cols = cols[:-1]
len(cols)
x = df[cols]
y = df['prognosis']
x
y
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
mnb = MultinomialNB()
mnb = mnb.fit(x_train, y_train)
mnb.score(x_test, y_test)
from sklearn import model_selection
print ("cross result========")
scores = model_selection.cross_val_score(mnb, x_test, y_test, cv=3)
print (scores)
print (scores.mean())
test_data = pd.read_csv("C:/Users/sahil/OneDrive/Desktop/CODE/Disease-Prediction-from-Symptoms-main/Datasets/Testing.csv")
test_data.head()
testx = test_data[cols]
testy = test_data['prognosis']
mnb.score(testx, testy)
from sklearn import model_selection
print ("cross result========")
scores = model_selection.cross_val_score(mnb, x_test, y_test, cv=3)
print (scores)
print (scores.mean())
from sklearn.tree import DecisionTreeClassifier, export_graphviz
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
print ("DecisionTree")
dt = DecisionTreeClassifier(min_samples_split=20)
clf_dt=dt.fit(x_train,y_train)
print ("Acurracy: ", clf_dt.score(x_test,y_test))
from sklearn import model_selection
print ("cross result========")
scores = model_selection.cross_val_score(dt, x_test, y_test, cv=3)
print (scores)
print (scores.mean())
print ("Acurracy on the actual test data: ", clf_dt.score(testx,testy))
from sklearn import tree
from sklearn.tree import export_graphviz
export_graphviz(dt,
out_file='tree.dot',
feature_names=cols)
!dot -Tpng tree.dot -o tree.png
from IPython.display import Image
Image(filename='C:/Users/sahil/OneDrive/Desktop/CODE/Disease-Prediction-from-Symptoms-main/tree.jpg')
dt.__getstate__()
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
importances = dt.feature_importances_
print(dt.feature_importances_)
indices = np.argsort(importances)[::-1]
print([data.columns[i] for i in indices])
features = cols
for f in range(20):
print("%d. feature %d - %s (%f)" % (f + 1, indices[f], features[indices[f]] ,importances[indices[f]]))
export_graphviz(dt,
out_file='tree-top5.dot',
feature_names=cols,
max_depth = 5
)
from IPython.display import Image
Image(filename='C:/Users/sahil/OneDrive/Desktop/CODE/Disease-Prediction-from-Symptoms-main/tree-top5.png')
feature_dict = {}
for i,f in enumerate(features):
feature_dict[f] = i
feature_dict['internal_itching']
sample_x = [i/52 if i==52 else i*0 for i in range(len(features))]
cols = list(data.columns)
print(cols.index('skin_rash'))
sample_x = np.array(sample_x).reshape(1,len(sample_x))
dt.predict(sample_x)
dt.predict_proba(sample_x)
len(sample_x)
symptoms = ['skin_rash','itching','nodal_skin_eruptions','increased_appetite','irritability']
ipt = [0 for i in range(len(features))]
for s in symptoms:
ipt[cols.index(s)]=1
ipt = np.array([ipt])
print(ipt)
print(dt.predict(ipt))
dt.predict_proba(ipt)
This is my common Diseases ML model with decision tree and Naves bayes
This model works fine and gives accurate prediction
import pickle
import streamlit as st
from streamlit_option_menu import option_menu
import numpy as np
st.set_page_config(layout='wide')
Common_model = pickle.load(open('saved modeldecision_tree_model.sav','rb'))
with st.sidebar:
selected = option_menu('Multiple Disease Prediction System',
['Diabetes Prediction',
'Heart Disease Prediction',
'Parkinsons Prediction',
'Common diseases Prediction'],
icons=['activity','heart','person','thermometer'],
default_index=3)
if (selected == 'Common diseases Prediction'):
st.title('Common Disease Prediction using ML')
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
symptom1 = st.text_input('Symptom 1')
with col2:
symptom2 = st.text_input('Symptom 2')
with col2:
symptom3 = st.text_input('Symptom 3')
with col2:
symptom4 = st.text_input('Symptom 4')
with col2:
symptom5 = st.text_input('Symptom 5')
common_diagnostics = ''
if st.button('Common Disease Prediction'):
Symptoms = [symptom1, symptom2, symptom3, symptom4, symptom5]
features = Common_model.features()
ipt = [0 for i in range(len(features))]
for s in Symptoms:
ipt[Common_model.cols.index(s)]=1
ipt = np.array([ipt])
pred = Common_model.dt.predict(ipt)
if (Symptoms == True):
common_diagnostics = ("Person has - " + pred)
else:
common_diagnostics = ("Persons diseases connot be classified by model, Please provide appropriate symptoms.")
st.success(common_diagnostics)
This is streamlit webapp that i am trying to implement