Skip to content

Commit a1433fd

Browse files
committed
Stock market prediction using greadient boosting
1 parent e49ece9 commit a1433fd

File tree

1 file changed

+123
-0
lines changed

1 file changed

+123
-0
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# <h1> Problem Statement: Stock Market Analysis and Prediction
5+
#
6+
# Explanation: Our aim is to create software that analyses previous stock data of certain companies,
7+
# with help of certain parameters that affect stock value. We are going to implement these values in data mining algorithms.
8+
# This will also help us to determine the values that particular stock will have in near future.
9+
# We will determine the Month’s High and Low with help of data mining algorithms.
10+
# In this project we are going to take a five years of stock data for our analysis and prediction
11+
12+
13+
#Install the dependencies pip install quandl
14+
import quandl
15+
import numpy as np
16+
#plotly.offline.init_notebook_mode(connected=True)
17+
import plotly.offline as py
18+
from sklearn.model_selection import train_test_split
19+
from plotly.offline import iplot, init_notebook_mode
20+
init_notebook_mode()
21+
from sklearn.ensemble import GradientBoostingRegressor
22+
from sklearn.metrics import r2_score, mean_squared_error
23+
import matplotlib.pyplot as plt
24+
25+
26+
# Get the stock data
27+
df = quandl.get("WIKI/MSFT")
28+
# Take a look at the data
29+
print(df.head())
30+
31+
32+
import plotly.express as px
33+
fig = px.scatter(df, x="High", y="Low")
34+
fig.show()
35+
36+
37+
# Get the Adjusted Close Price
38+
df = df[['Adj. Close']]
39+
# Take a look at the new data
40+
print(df.head())
41+
42+
43+
44+
# A variable for predicting 'n' days out into the future
45+
forecast_out = 30 #'n=30' days
46+
#Create another column (the target ) shifted 'n' units up
47+
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
48+
#print the new data set
49+
print(df.tail())
50+
51+
52+
# Convert the dataframe to a numpy array
53+
X = np.array(df.drop(['Prediction'],1))
54+
55+
#Remove the last '30' rows
56+
X = X[:-forecast_out]
57+
print(X)
58+
59+
60+
61+
### Create the dependent data set (y) #####
62+
# Convert the dataframe to a numpy array
63+
y = np.array(df['Prediction'])
64+
# Get all of the y values except the last '30' rows
65+
y = y[:-forecast_out]
66+
print(y)
67+
68+
69+
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
70+
71+
72+
73+
74+
params = {
75+
'loss':'ls',
76+
'learning_rate':0.1,
77+
'n_estimators':500,
78+
'min_samples_split':2,
79+
'min_weight_fraction_leaf':0.0,
80+
'max_depth':3,
81+
82+
}
83+
model = GradientBoostingRegressor(**params)
84+
model.fit(x_train,y_train)
85+
model.score(x_train,y_train).round(3)
86+
model.score(x_test,y_test).round(3)
87+
y_pred = model.predict(x_test)
88+
print('The mean squared error is: ', mean_squared_error(y_test,y_pred))
89+
print('The variance is: ', r2_score(y_test,y_pred))
90+
91+
# So let's run the model against the test data
92+
from sklearn.model_selection import cross_val_predict
93+
94+
fig, ax = plt.subplots()
95+
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
96+
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
97+
ax.set_xlabel('Actual')
98+
ax.set_ylabel('Predicted')
99+
ax.set_title("Ground Truth vs Predicted")
100+
plt.show()
101+
# deviance is a goodness-of-fit statistic for a statistical model; it is often used for statistical hypothesis testing. It is a generalization of the idea of using the sum of squares
102+
#of residuals in ordinary least squares to cases where model-fitting is achieved by maximum likelihood.
103+
test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
104+
for i, y_pred in enumerate(model.staged_predict(x_test)):
105+
test_score[i] = model.loss_(y_test, y_pred)
106+
107+
fig = plt.figure(figsize=(10, 6))
108+
plt.subplot(1, 1, 1)
109+
plt.title('Deviance')
110+
plt.plot(np.arange(params['n_estimators']) + 1, model.train_score_, 'b-',
111+
label='Training Set Deviance')
112+
plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
113+
label='Test Set Deviance')
114+
plt.legend(loc='upper right')
115+
plt.xlabel('Boosting Iterations')
116+
plt.ylabel('Deviance')
117+
fig.tight_layout()
118+
plt.show()
119+
120+
121+
122+
123+

0 commit comments

Comments
 (0)