|
1 |
| -#!/usr/bin/env python |
2 |
| -# coding: utf-8 |
3 |
| - |
4 |
| -# <h1> Problem Statement: Stock Market Analysis and Prediction |
5 |
| -# |
6 |
| -# Explanation: Our aim is to create software that analyses previous stock data of certain companies, |
7 |
| -# with help of certain parameters that affect stock value. We are going to implement these values in data mining algorithms. |
8 |
| -# This will also help us to determine the values that particular stock will have in near future. |
9 |
| -# We will determine the Month’s High and Low with help of data mining algorithms. |
10 |
| -# In this project we are going to take a five years of stock data for our analysis and prediction |
11 |
| - |
12 |
| - |
| 1 | +""" We are going to predict the adj close price of microsoft stock price.""" |
13 | 2 | #Install the dependencies pip install quandl
|
14 | 3 | import quandl
|
15 | 4 | import numpy as np
|
16 |
| -#plotly.offline.init_notebook_mode(connected=True) |
17 |
| -import plotly.offline as py |
18 | 5 | from sklearn.model_selection import train_test_split
|
19 |
| -from plotly.offline import iplot, init_notebook_mode |
20 |
| -init_notebook_mode() |
21 | 6 | from sklearn.ensemble import GradientBoostingRegressor
|
22 | 7 | from sklearn.metrics import r2_score, mean_squared_error
|
23 | 8 | import matplotlib.pyplot as plt
|
24 |
| - |
25 |
| - |
26 | 9 | # Get the stock data
|
27 | 10 | df = quandl.get("WIKI/MSFT")
|
28 | 11 | # Take a look at the data
|
29 | 12 | print(df.head())
|
30 |
| - |
31 |
| - |
32 | 13 | import plotly.express as px
|
33 | 14 | fig = px.scatter(df, x="High", y="Low")
|
34 | 15 | fig.show()
|
35 |
| - |
36 |
| - |
37 | 16 | # Get the Adjusted Close Price
|
38 | 17 | df = df[['Adj. Close']]
|
39 | 18 | # Take a look at the new data
|
40 | 19 | print(df.head())
|
41 |
| - |
42 |
| - |
43 |
| - |
44 | 20 | # A variable for predicting 'n' days out into the future
|
45 | 21 | forecast_out = 30 #'n=30' days
|
46 | 22 | #Create another column (the target ) shifted 'n' units up
|
47 | 23 | df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
|
48 | 24 | #print the new data set
|
49 | 25 | print(df.tail())
|
50 |
| - |
51 |
| - |
52 | 26 | # Convert the dataframe to a numpy array
|
53 | 27 | X = np.array(df.drop(['Prediction'],1))
|
54 |
| - |
55 | 28 | #Remove the last '30' rows
|
56 | 29 | X = X[:-forecast_out]
|
57 | 30 | print(X)
|
58 |
| - |
59 |
| - |
60 |
| - |
61 | 31 | ### Create the dependent data set (y) #####
|
62 | 32 | # Convert the dataframe to a numpy array
|
63 | 33 | y = np.array(df['Prediction'])
|
64 | 34 | # Get all of the y values except the last '30' rows
|
65 | 35 | y = y[:-forecast_out]
|
66 | 36 | print(y)
|
67 |
| - |
68 |
| - |
69 | 37 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
70 |
| - |
71 |
| - |
72 |
| - |
73 |
| - |
| 38 | +#these are the parametes that we are given to the gradient boosting regressor |
74 | 39 | params = {
|
75 | 40 | 'loss':'ls',
|
76 | 41 | 'learning_rate':0.1,
|
|
98 | 63 | ax.set_ylabel('Predicted')
|
99 | 64 | ax.set_title("Ground Truth vs Predicted")
|
100 | 65 | plt.show()
|
101 |
| -# deviance is a goodness-of-fit statistic for a statistical model; it is often used for statistical hypothesis testing. It is a generalization of the idea of using the sum of squares |
| 66 | +# deviance is a goodness-of-fit statistic for a statistical model; it is often used for statistical hypothesis testing. |
| 67 | +#It is a generalization of the idea of using the sum of squares |
102 | 68 | #of residuals in ordinary least squares to cases where model-fitting is achieved by maximum likelihood.
|
103 | 69 | test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
|
104 | 70 | for i, y_pred in enumerate(model.staged_predict(x_test)):
|
|
0 commit comments