|
2 | 2 | boston dataset which is very popular for regression problem to
|
3 | 3 | predict house price.
|
4 | 4 | """
|
| 5 | + |
5 | 6 | import pandas as pd
|
6 | 7 | import matplotlib.pyplot as plt
|
7 | 8 | from sklearn.datasets import load_boston
|
8 |
| -from sklearn.metrics import mean_squared_error,r2_score |
9 |
| -from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor |
| 9 | +from sklearn.metrics import mean_squared_error, r2_score |
| 10 | +from sklearn.ensemble import GradientBoostingRegressor |
10 | 11 | from sklearn.model_selection import train_test_split
|
11 | 12 |
|
| 13 | + |
12 | 14 | def main():
|
| 15 | + |
13 | 16 | # loading the dataset from the sklearn package
|
14 | 17 | df = load_boston()
|
15 | 18 | print(df.keys())
|
16 | 19 | # now let construct a data frame with data and target variables
|
17 |
| - df_boston = pd.DataFrame(df.data,columns =df.feature_names) |
| 20 | + df_boston = pd.DataFrame(df.data, columns=df.feature_names) |
18 | 21 | # let add the target to the dataframe
|
19 |
| - df_boston['Price']= df.target |
| 22 | + df_boston["Price"] = df.target |
20 | 23 | # let us print the first five rows using the head function
|
21 | 24 | print(df_boston.head())
|
22 |
| - print(df_boston.describe().T) # to see summary statistics of the dataset |
| 25 | + print(df_boston.describe().T) # to see summary statistics of the dataset |
23 | 26 | # Feature selection means for independent and dependent variables
|
24 |
| - X = df_boston.iloc[:,:-1] |
25 |
| - y = df_boston.iloc[:,-1] # target variable |
| 27 | + |
| 28 | + X = df_boston.iloc[:, :-1] |
| 29 | + y = df_boston.iloc[:, -1] # target variable |
26 | 30 | # we are going to split the data with 75% train and 25% test sets.
|
27 |
| - X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 0, test_size = .25) |
28 |
| - # now let set the parameters of the model |
29 |
| - params = {'n_estimators': 500, 'max_depth': 5, 'min_samples_split': 4, |
30 |
| - 'learning_rate': 0.01, 'loss': 'ls'} |
| 31 | + X_train, X_test, y_train, y_test = train_test_split( |
| 32 | + X, y, random_state=0, test_size=0.25 |
| 33 | + ) |
| 34 | + # model parameter |
| 35 | + params = { |
| 36 | + "n_estimators": 500, |
| 37 | + "max_depth": 5, |
| 38 | + "min_samples_split": 4, |
| 39 | + "learning_rate": 0.01, |
| 40 | + "loss": "ls", |
| 41 | + } |
31 | 42 | model = GradientBoostingRegressor(**params)
|
32 | 43 | # training the model
|
33 |
| - model.fit(X_train,y_train) |
34 |
| - """ let have a look on the train and test score to see how good the model fit the data""" |
35 |
| - score = model.score(X_train,y_train).round(3) |
36 |
| - print("Training score of GradientBoosting is :",score) |
37 |
| - print("the test score of GradienBoosting is :",model.score(X_test,y_test).round(3)) |
38 |
| - # Let us evaluation the model by finding the errors |
| 44 | + model.fit(X_train, y_train) |
| 45 | + """let have a look on the train and test score to see how good the model fit the data""" |
| 46 | + score = model.score(X_train, y_train).round(3) |
| 47 | + print("Training score of GradientBoosting is :", score) |
| 48 | + print( |
| 49 | + "the test score of GradienBoosting is :", model.score(X_test, y_test).round(3) |
| 50 | + ) |
| 51 | + # Let us evaluation the model by finding the errors |
39 | 52 | y_pred = model.predict(X_test)
|
40 | 53 |
|
41 | 54 | # The mean squared error
|
42 |
| - print("Mean squared error: %.2f"% mean_squared_error(y_test, y_pred)) |
| 55 | + print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred)) |
43 | 56 | # Explained variance score: 1 is perfect prediction
|
44 |
| - print('Test Variance score: %.2f' % r2_score(y_test, y_pred)) |
45 |
| - |
| 57 | + print("Test Variance score: %.2f" % r2_score(y_test, y_pred)) |
| 58 | + |
46 | 59 | # So let's run the model against the test data
|
47 | 60 | fig, ax = plt.subplots()
|
48 | 61 | ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
|
49 |
| - ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4) |
50 |
| - ax.set_xlabel('Actual') |
51 |
| - ax.set_ylabel('Predicted') |
| 62 | + ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=4) |
| 63 | + ax.set_xlabel("Actual") |
| 64 | + ax.set_ylabel("Predicted") |
52 | 65 | ax.set_title("Truth vs Predicted")
|
53 |
| - # this show function will display the plotting |
| 66 | + # this show function will display the plotting |
54 | 67 | plt.show()
|
55 |
| - |
56 |
| - |
57 |
| -if __name__ =='__main__': |
58 |
| - main() |
59 |
| - |
60 |
| - |
61 |
| -# In[ ]: |
62 |
| - |
63 |
| - |
64 | 68 |
|
65 | 69 |
|
| 70 | +if __name__ == "__main__": |
| 71 | + main() |
0 commit comments