Skip to content

Commit 07cc62a

Browse files
authored
Update app.py
1 parent 40c6531 commit 07cc62a

File tree

1 file changed

+25
-47
lines changed
  • Job Satisfaction Analysis

1 file changed

+25
-47
lines changed

Job Satisfaction Analysis/app.py

Lines changed: 25 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import streamlit as st
22
import joblib
33
import pandas as pd
4-
import matplotlib.pyplot as plt
5-
import seaborn as sns
6-
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
4+
from sklearn.preprocessing import LabelEncoder
75

86
# Load the model
97
model = joblib.load('model.pkl')
@@ -12,6 +10,26 @@
1210
features = ['Hobby', 'OpenSource', 'Country', 'Student', 'Employment', 'FormalEducation',
1311
'UndergradMajor', 'CompanySize', 'DevType', 'YearsCoding', 'YearsCodingProf']
1412

13+
# Initialize label encoders for categorical features
14+
encoders = {
15+
'Hobby': LabelEncoder().fit(['Yes', 'No']),
16+
'OpenSource': LabelEncoder().fit(['Yes', 'No']),
17+
'Country': LabelEncoder().fit(['United States', 'India', 'Germany']),
18+
'Student': LabelEncoder().fit(['Yes', 'No']),
19+
'Employment': LabelEncoder().fit(['Employed full-time', 'Employed part-time', 'Self-employed', 'Unemployed']),
20+
'FormalEducation': LabelEncoder().fit(["Bachelor’s degree (BA, BS, B.Eng., etc.)",
21+
"Master’s degree (MA, MS, M.Eng., MBA, etc.)",
22+
"Doctoral degree (PhD)"]),
23+
'UndergradMajor': LabelEncoder().fit(["Computer science, computer engineering, or software engineering",
24+
"Information technology, networking, or system administration",
25+
"Other engineering discipline"]),
26+
'CompanySize': LabelEncoder().fit(['Fewer than 10 employees', '10 to 19 employees', '20 to 99 employees',
27+
'100 to 499 employees', '500 to 999 employees', '1,000 to 4,999 employees']),
28+
'DevType': LabelEncoder().fit(['Developer, back-end', 'Developer, front-end', 'Developer, full-stack']),
29+
'YearsCoding': LabelEncoder().fit(['0-2 years', '3-5 years', '6-8 years', '9-11 years']),
30+
'YearsCodingProf': LabelEncoder().fit(['0-2 years', '3-5 years', '6-8 years', '9-11 years']),
31+
}
32+
1533
st.title('Job Satisfaction Prediction')
1634

1735
# Create a form for user input
@@ -54,6 +72,10 @@
5472
# Convert user input to DataFrame
5573
input_df = pd.DataFrame([input_data])
5674

75+
# Encode categorical features
76+
for feature in features:
77+
input_df[feature] = encoders[feature].transform(input_df[feature])
78+
5779
# Ensure the input has the same columns as the training data
5880
input_df = input_df[features]
5981

@@ -62,47 +84,3 @@
6284

6385
# Display the prediction
6486
st.write(f'Predicted Job Satisfaction: {prediction[0]}')
65-
66-
# Evaluate the model on test data (assuming y_test and y_pred are available)
67-
# This part would typically be done during model development, not in the prediction app
68-
# However, for demonstration purposes, we can create some dummy data
69-
y_test = [1, 0, 1, 1, 0] # Example true labels
70-
y_pred = model.predict(input_df) # Example predicted labels
71-
72-
# Print accuracy
73-
accuracy = accuracy_score(y_test, y_pred)
74-
st.write(f'Accuracy: {accuracy:.2f}')
75-
76-
# Print classification report
77-
report = classification_report(y_test, y_pred, output_dict=True)
78-
st.write('Classification Report:')
79-
st.write(report)
80-
81-
# Convert classification report to a DataFrame for better readability
82-
report_df = pd.DataFrame(report).transpose()
83-
st.write(report_df)
84-
85-
# Plot confusion matrix
86-
cm = confusion_matrix(y_test, y_pred)
87-
plt.figure(figsize=(10, 6))
88-
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
89-
plt.title('Confusion Matrix')
90-
plt.xlabel('Predicted')
91-
plt.ylabel('Actual')
92-
st.pyplot(plt)
93-
94-
# If the model is a binary classifier, plot the ROC curve
95-
if len(set(y_test)) == 2:
96-
fpr, tpr, _ = roc_curve(y_test, y_pred)
97-
roc_auc = auc(fpr, tpr)
98-
99-
plt.figure(figsize=(10, 6))
100-
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
101-
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
102-
plt.xlim([0.0, 1.0])
103-
plt.ylim([0.0, 1.05])
104-
plt.xlabel('False Positive Rate')
105-
plt.ylabel('True Positive Rate')
106-
plt.title('Receiver Operating Characteristic (ROC) Curve')
107-
plt.legend(loc='lower right')
108-
st.pyplot(plt)

0 commit comments

Comments
 (0)