Skip to content

Commit 2b15386

Browse files
committed
Gradient boosting regressor on boston dataset
1 parent d77a3be commit 2b15386

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# In[17]:
5+
6+
7+
"""Implementation of GradientBoostingRegressor in sklearn using the
8+
boston dataset which is very popular for regression problem to
9+
predict house price.
10+
"""
11+
import pandas as pd
12+
import matplotlib.pyplot as plt
13+
from sklearn.datasets import load_boston
14+
from sklearn.metrics import mean_squared_error,r2_score
15+
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
16+
from sklearn.model_selection import train_test_split
17+
18+
def main():
19+
# loading the dataset from the sklearn package
20+
df = load_boston()
21+
print(df.keys())
22+
# now let constract a data frame with data and target variables
23+
df_boston = pd.DataFrame(df.data,columns =df.feature_names)
24+
# let add the target to the dataframe
25+
df_boston['Price']= df.target
26+
# let us print the first five rows using the head function
27+
print(df_boston.head())
28+
print(df_boston.describe().T) # to see summary statistics of the dataset
29+
# Feature selection means for independant and dependent variables
30+
X = df_boston.iloc[:,:-1]
31+
y = df_boston.iloc[:,-1] # target variable
32+
# we are going to split the data with 75% train and 25% test sets.
33+
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 0, test_size = .25)
34+
# now let set the parametes of our model
35+
params = {'n_estimators': 500, 'max_depth': 5, 'min_samples_split': 4,
36+
'learning_rate': 0.01, 'loss': 'ls'}
37+
model = GradientBoostingRegressor(**params)
38+
# training the model
39+
model.fit(X_train,y_train)
40+
""" let have a look on the train and test score to see how good the model fit the data"""
41+
score = model.score(X_train,y_train).round(3)
42+
print("Training score of GradientBoosting is :",score)
43+
print("the test score of GradienBoosting is :",model.score(X_test,y_test).round(3))
44+
# Let us evaluation the model by finding the errors
45+
y_pred = model.predict(X_test)
46+
47+
# The mean squared error
48+
print("Mean squared error: %.2f"% mean_squared_error(y_test, y_pred))
49+
# Explained variance score: 1 is perfect prediction
50+
print('Test Variance score: %.2f' % r2_score(y_test, y_pred))
51+
52+
# So let's run the model against the test data
53+
fig, ax = plt.subplots()
54+
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
55+
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
56+
ax.set_xlabel('Actual')
57+
ax.set_ylabel('Predicted')
58+
ax.set_title("Truth vs Predicted")
59+
# this show function will display the ploting
60+
plt.show()
61+
62+
63+
if __name__ =='__main__':
64+
main()
65+
66+
67+
# In[ ]:
68+
69+
70+
71+

0 commit comments

Comments
 (0)