Skip to content

Commit c5e2e09

Browse files
committed
Gradient boosting regressor
1 parent 54827b6 commit c5e2e09

File tree

1 file changed

+39
-33
lines changed

1 file changed

+39
-33
lines changed

machine_learning/Gradient-boosting-regressor.py

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,64 +2,70 @@
22
boston dataset which is very popular for regression problem to
33
predict house price.
44
"""
5+
56
import pandas as pd
67
import matplotlib.pyplot as plt
78
from sklearn.datasets import load_boston
8-
from sklearn.metrics import mean_squared_error,r2_score
9-
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
9+
from sklearn.metrics import mean_squared_error, r2_score
10+
from sklearn.ensemble import GradientBoostingRegressor
1011
from sklearn.model_selection import train_test_split
1112

13+
1214
def main():
15+
1316
# loading the dataset from the sklearn package
1417
df = load_boston()
1518
print(df.keys())
1619
# now let construct a data frame with data and target variables
17-
df_boston = pd.DataFrame(df.data,columns =df.feature_names)
20+
df_boston = pd.DataFrame(df.data, columns=df.feature_names)
1821
# let add the target to the dataframe
19-
df_boston['Price']= df.target
22+
df_boston["Price"] = df.target
2023
# let us print the first five rows using the head function
2124
print(df_boston.head())
22-
print(df_boston.describe().T) # to see summary statistics of the dataset
25+
print(df_boston.describe().T) # to see summary statistics of the dataset
2326
# Feature selection means for independent and dependent variables
24-
X = df_boston.iloc[:,:-1]
25-
y = df_boston.iloc[:,-1] # target variable
27+
28+
X = df_boston.iloc[:, :-1]
29+
y = df_boston.iloc[:, -1] # target variable
2630
# we are going to split the data with 75% train and 25% test sets.
27-
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 0, test_size = .25)
28-
# now let set the parameters of the model
29-
params = {'n_estimators': 500, 'max_depth': 5, 'min_samples_split': 4,
30-
'learning_rate': 0.01, 'loss': 'ls'}
31+
X_train, X_test, y_train, y_test = train_test_split(
32+
X, y, random_state=0, test_size=0.25
33+
)
34+
# model parameter
35+
params = {
36+
"n_estimators": 500,
37+
"max_depth": 5,
38+
"min_samples_split": 4,
39+
"learning_rate": 0.01,
40+
"loss": "ls",
41+
}
3142
model = GradientBoostingRegressor(**params)
3243
# training the model
33-
model.fit(X_train,y_train)
34-
""" let have a look on the train and test score to see how good the model fit the data"""
35-
score = model.score(X_train,y_train).round(3)
36-
print("Training score of GradientBoosting is :",score)
37-
print("the test score of GradienBoosting is :",model.score(X_test,y_test).round(3))
38-
# Let us evaluation the model by finding the errors
44+
model.fit(X_train, y_train)
45+
"""let have a look on the train and test score to see how good the model fit the data"""
46+
score = model.score(X_train, y_train).round(3)
47+
print("Training score of GradientBoosting is :", score)
48+
print(
49+
"the test score of GradienBoosting is :", model.score(X_test, y_test).round(3)
50+
)
51+
# Let us evaluation the model by finding the errors
3952
y_pred = model.predict(X_test)
4053

4154
# The mean squared error
42-
print("Mean squared error: %.2f"% mean_squared_error(y_test, y_pred))
55+
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
4356
# Explained variance score: 1 is perfect prediction
44-
print('Test Variance score: %.2f' % r2_score(y_test, y_pred))
45-
57+
print("Test Variance score: %.2f" % r2_score(y_test, y_pred))
58+
4659
# So let's run the model against the test data
4760
fig, ax = plt.subplots()
4861
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
49-
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
50-
ax.set_xlabel('Actual')
51-
ax.set_ylabel('Predicted')
62+
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=4)
63+
ax.set_xlabel("Actual")
64+
ax.set_ylabel("Predicted")
5265
ax.set_title("Truth vs Predicted")
53-
# this show function will display the plotting
66+
# this show function will display the plotting
5467
plt.show()
55-
56-
57-
if __name__ =='__main__':
58-
main()
59-
60-
61-
# In[ ]:
62-
63-
6468

6569

70+
if __name__ == "__main__":
71+
main()

0 commit comments

Comments
 (0)