Skip to content

Commit d77a3be

Browse files
committed
Predict Stock Prices Python & Machine Learning
1 parent 607e36d commit d77a3be

File tree

1 file changed

+4
-38
lines changed

1 file changed

+4
-38
lines changed

machine_learning/stock-market-forecast-using-gradient-boosting-regressor.py

Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,41 @@
1-
#!/usr/bin/env python
2-
# coding: utf-8
3-
4-
# <h1> Problem Statement: Stock Market Analysis and Prediction
5-
#
6-
# Explanation: Our aim is to create software that analyses previous stock data of certain companies,
7-
# with help of certain parameters that affect stock value. We are going to implement these values in data mining algorithms.
8-
# This will also help us to determine the values that particular stock will have in near future.
9-
# We will determine the Month’s High and Low with help of data mining algorithms.
10-
# In this project we are going to take a five years of stock data for our analysis and prediction
11-
12-
1+
""" We are going to predict the adj close price of microsoft stock price."""
132
#Install the dependencies pip install quandl
143
import quandl
154
import numpy as np
16-
#plotly.offline.init_notebook_mode(connected=True)
17-
import plotly.offline as py
185
from sklearn.model_selection import train_test_split
19-
from plotly.offline import iplot, init_notebook_mode
20-
init_notebook_mode()
216
from sklearn.ensemble import GradientBoostingRegressor
227
from sklearn.metrics import r2_score, mean_squared_error
238
import matplotlib.pyplot as plt
24-
25-
269
# Get the stock data
2710
df = quandl.get("WIKI/MSFT")
2811
# Take a look at the data
2912
print(df.head())
30-
31-
3213
import plotly.express as px
3314
fig = px.scatter(df, x="High", y="Low")
3415
fig.show()
35-
36-
3716
# Get the Adjusted Close Price
3817
df = df[['Adj. Close']]
3918
# Take a look at the new data
4019
print(df.head())
41-
42-
43-
4420
# A variable for predicting 'n' days out into the future
4521
forecast_out = 30 #'n=30' days
4622
#Create another column (the target ) shifted 'n' units up
4723
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
4824
#print the new data set
4925
print(df.tail())
50-
51-
5226
# Convert the dataframe to a numpy array
5327
X = np.array(df.drop(['Prediction'],1))
54-
5528
#Remove the last '30' rows
5629
X = X[:-forecast_out]
5730
print(X)
58-
59-
60-
6131
### Create the dependent data set (y) #####
6232
# Convert the dataframe to a numpy array
6333
y = np.array(df['Prediction'])
6434
# Get all of the y values except the last '30' rows
6535
y = y[:-forecast_out]
6636
print(y)
67-
68-
6937
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
70-
71-
72-
73-
38+
#these are the parametes that we are given to the gradient boosting regressor
7439
params = {
7540
'loss':'ls',
7641
'learning_rate':0.1,
@@ -98,7 +63,8 @@
9863
ax.set_ylabel('Predicted')
9964
ax.set_title("Ground Truth vs Predicted")
10065
plt.show()
101-
# deviance is a goodness-of-fit statistic for a statistical model; it is often used for statistical hypothesis testing. It is a generalization of the idea of using the sum of squares
66+
# deviance is a goodness-of-fit statistic for a statistical model; it is often used for statistical hypothesis testing.
67+
#It is a generalization of the idea of using the sum of squares
10268
#of residuals in ordinary least squares to cases where model-fitting is achieved by maximum likelihood.
10369
test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
10470
for i, y_pred in enumerate(model.staged_predict(x_test)):

0 commit comments

Comments
 (0)