|
1 | 1 | import streamlit as st
|
2 | 2 | import joblib
|
3 | 3 | import pandas as pd
|
4 |
| -import matplotlib.pyplot as plt |
5 |
| -import seaborn as sns |
6 |
| -from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc |
| 4 | +from sklearn.preprocessing import LabelEncoder |
7 | 5 |
|
8 | 6 | # Load the model
|
9 | 7 | model = joblib.load('model.pkl')
|
|
12 | 10 | features = ['Hobby', 'OpenSource', 'Country', 'Student', 'Employment', 'FormalEducation',
|
13 | 11 | 'UndergradMajor', 'CompanySize', 'DevType', 'YearsCoding', 'YearsCodingProf']
|
14 | 12 |
|
| 13 | +# Initialize label encoders for categorical features |
| 14 | +encoders = { |
| 15 | + 'Hobby': LabelEncoder().fit(['Yes', 'No']), |
| 16 | + 'OpenSource': LabelEncoder().fit(['Yes', 'No']), |
| 17 | + 'Country': LabelEncoder().fit(['United States', 'India', 'Germany']), |
| 18 | + 'Student': LabelEncoder().fit(['Yes', 'No']), |
| 19 | + 'Employment': LabelEncoder().fit(['Employed full-time', 'Employed part-time', 'Self-employed', 'Unemployed']), |
| 20 | + 'FormalEducation': LabelEncoder().fit(["Bachelor’s degree (BA, BS, B.Eng., etc.)", |
| 21 | + "Master’s degree (MA, MS, M.Eng., MBA, etc.)", |
| 22 | + "Doctoral degree (PhD)"]), |
| 23 | + 'UndergradMajor': LabelEncoder().fit(["Computer science, computer engineering, or software engineering", |
| 24 | + "Information technology, networking, or system administration", |
| 25 | + "Other engineering discipline"]), |
| 26 | + 'CompanySize': LabelEncoder().fit(['Fewer than 10 employees', '10 to 19 employees', '20 to 99 employees', |
| 27 | + '100 to 499 employees', '500 to 999 employees', '1,000 to 4,999 employees']), |
| 28 | + 'DevType': LabelEncoder().fit(['Developer, back-end', 'Developer, front-end', 'Developer, full-stack']), |
| 29 | + 'YearsCoding': LabelEncoder().fit(['0-2 years', '3-5 years', '6-8 years', '9-11 years']), |
| 30 | + 'YearsCodingProf': LabelEncoder().fit(['0-2 years', '3-5 years', '6-8 years', '9-11 years']), |
| 31 | +} |
| 32 | + |
15 | 33 | st.title('Job Satisfaction Prediction')
|
16 | 34 |
|
17 | 35 | # Create a form for user input
|
|
54 | 72 | # Convert user input to DataFrame
|
55 | 73 | input_df = pd.DataFrame([input_data])
|
56 | 74 |
|
| 75 | + # Encode categorical features |
| 76 | + for feature in features: |
| 77 | + input_df[feature] = encoders[feature].transform(input_df[feature]) |
| 78 | + |
57 | 79 | # Ensure the input has the same columns as the training data
|
58 | 80 | input_df = input_df[features]
|
59 | 81 |
|
|
62 | 84 |
|
63 | 85 | # Display the prediction
|
64 | 86 | st.write(f'Predicted Job Satisfaction: {prediction[0]}')
|
65 |
| - |
66 |
| - # Evaluate the model on test data (assuming y_test and y_pred are available) |
67 |
| - # This part would typically be done during model development, not in the prediction app |
68 |
| - # However, for demonstration purposes, we can create some dummy data |
69 |
| - y_test = [1, 0, 1, 1, 0] # Example true labels |
70 |
| - y_pred = model.predict(input_df) # Example predicted labels |
71 |
| - |
72 |
| - # Print accuracy |
73 |
| - accuracy = accuracy_score(y_test, y_pred) |
74 |
| - st.write(f'Accuracy: {accuracy:.2f}') |
75 |
| - |
76 |
| - # Print classification report |
77 |
| - report = classification_report(y_test, y_pred, output_dict=True) |
78 |
| - st.write('Classification Report:') |
79 |
| - st.write(report) |
80 |
| - |
81 |
| - # Convert classification report to a DataFrame for better readability |
82 |
| - report_df = pd.DataFrame(report).transpose() |
83 |
| - st.write(report_df) |
84 |
| - |
85 |
| - # Plot confusion matrix |
86 |
| - cm = confusion_matrix(y_test, y_pred) |
87 |
| - plt.figure(figsize=(10, 6)) |
88 |
| - sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False) |
89 |
| - plt.title('Confusion Matrix') |
90 |
| - plt.xlabel('Predicted') |
91 |
| - plt.ylabel('Actual') |
92 |
| - st.pyplot(plt) |
93 |
| - |
94 |
| - # If the model is a binary classifier, plot the ROC curve |
95 |
| - if len(set(y_test)) == 2: |
96 |
| - fpr, tpr, _ = roc_curve(y_test, y_pred) |
97 |
| - roc_auc = auc(fpr, tpr) |
98 |
| - |
99 |
| - plt.figure(figsize=(10, 6)) |
100 |
| - plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})') |
101 |
| - plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') |
102 |
| - plt.xlim([0.0, 1.0]) |
103 |
| - plt.ylim([0.0, 1.05]) |
104 |
| - plt.xlabel('False Positive Rate') |
105 |
| - plt.ylabel('True Positive Rate') |
106 |
| - plt.title('Receiver Operating Characteristic (ROC) Curve') |
107 |
| - plt.legend(loc='lower right') |
108 |
| - st.pyplot(plt) |
0 commit comments