1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly .express as px
5
+ import matplotlib .pyplot as plt
6
+ import seaborn as sns
7
+ from statsmodels .tsa .arima .model import ARIMA
8
+ import datetime
9
+ import plotly .graph_objects as go
10
+ import networkx as nx
11
+
12
+
13
+ # Load and preprocess data using st.cache
14
+ st .cache_data (hash_funcs = {pd .DataFrame : lambda _ : None })
15
+ def load_data ():
16
+ df = pd .read_csv ('TotalQuestions.csv' , parse_dates = ['Month' ])
17
+ df .set_index ('Month' , inplace = True )
18
+ return df
19
+
20
+
21
+ # Sidebar navigation
22
+ menu = st .sidebar .selectbox ('Navigation' , ['Stack Overflow Question Forecast' , 'Graphical Analysis' , 'Timeline Visualization' ])
23
+
24
+ if menu == 'Stack Overflow Question Forecast' :
25
+ # Load data
26
+ df = load_data ()
27
+ languages = df .columns .tolist ()
28
+
29
+
30
+ def forecast_questions (df , language , future_month , future_year ):
31
+ model = ARIMA (df [language ], order = (5 , 1 , 0 )) # Simple ARIMA model for demonstration
32
+ model_fit = model .fit ()
33
+ last_date = df .index [- 1 ]
34
+ future_date = pd .to_datetime (f'{ future_year } -{ future_month :02d} -01' )
35
+ months_ahead = (future_date .year - last_date .year ) * 12 + future_date .month - last_date .month
36
+ if months_ahead <= 0 :
37
+ raise ValueError ("Prediction must have end after start." )
38
+ forecast = model_fit .forecast (steps = months_ahead )
39
+ return forecast .iloc [- 1 ] # Correctly accessing the last forecasted value
40
+
41
+
42
+ def generate_forecasts (df , language , start_date , periods ):
43
+ model = ARIMA (df [language ], order = (5 , 1 , 0 ))
44
+ model_fit = model .fit ()
45
+ forecast = model_fit .forecast (steps = periods )
46
+ future_dates = pd .date_range (start = start_date , periods = periods , freq = 'M' )
47
+ forecast_df = pd .DataFrame ({language : forecast }, index = future_dates )
48
+ return forecast_df
49
+
50
+
51
+ # Modify title style
52
+ st .markdown (
53
+ "<h1 style='color: #87CEEB; font-size: 36px;'>Stack Overflow Question Forecast</h1>" ,
54
+ unsafe_allow_html = True
55
+ )
56
+ st .markdown ("---" , unsafe_allow_html = True )
57
+ st .subheader ('Select Programming Language' )
58
+ selected_language = st .selectbox ('' , languages )
59
+
60
+ col1 , col2 = st .columns (2 )
61
+ with col1 :
62
+ st .subheader ('Select Future Month' )
63
+ future_month = st .selectbox ('' , list (range (1 , 13 )),
64
+ format_func = lambda x : datetime .date (1900 , x , 1 ).strftime ('%B' ))
65
+ with col2 :
66
+ st .subheader ('Select Future Year' )
67
+ future_year = st .selectbox ('' , list (range (datetime .datetime .now ().year , datetime .datetime .now ().year + 6 )))
68
+
69
+ # Forecast for the selected month and year
70
+ if st .button ('Predict' ):
71
+ try :
72
+ prediction = forecast_questions (df , selected_language , future_month , future_year )
73
+ st .markdown (
74
+ f"<div style='background-color: green; color: white; padding: 10px; border-radius: 5px;'><strong>Predicted number of questions for { selected_language } in { datetime .date (1900 , future_month , 1 ).strftime ('%B' )} { future_year } : <span style='color: red;'>{ int (prediction )} </span></strong></div>" ,
75
+ unsafe_allow_html = True )
76
+
77
+ # Generate additional forecasts for plots
78
+ start_date = df .index [- 1 ] + pd .offsets .MonthBegin ()
79
+ forecast_df = generate_forecasts (df , selected_language , start_date , 12 )
80
+
81
+ # Plot 1: Count plot of total questions for each month in the selected year
82
+ months = pd .date_range (start = f'{ future_year } -01-01' , end = f'{ future_year } -12-31' , freq = 'M' )
83
+ month_forecasts = [forecast_questions (df , selected_language , month .month , month .year ) for month in months ]
84
+ month_forecast_df = pd .DataFrame ({selected_language : month_forecasts }, index = months )
85
+
86
+ fig1 = px .bar (month_forecast_df , x = month_forecast_df .index .strftime ('%B' ), y = selected_language ,
87
+ title = f'Monthly Predictions for { future_year } ' )
88
+ st .plotly_chart (fig1 )
89
+
90
+ # Plot 2: Sum of total number of questions for the next five years including the predicted year
91
+ future_years = list (range (datetime .datetime .now ().year , future_year + 5 ))
92
+ year_forecasts = []
93
+ for year in future_years :
94
+ if year <= df .index [- 1 ].year :
95
+ year_forecasts .append (df [df .index .year == year ][selected_language ].sum ())
96
+ else :
97
+ months = pd .date_range (start = f'{ year } -01-01' , end = f'{ year } -12-31' , freq = 'M' )
98
+ year_forecasts .append (
99
+ sum ([forecast_questions (df , selected_language , month .month , month .year ) for month in months ]))
100
+ year_forecast_df = pd .DataFrame ({selected_language : year_forecasts }, index = future_years )
101
+
102
+ fig2 = px .bar (year_forecast_df , x = year_forecast_df .index , y = selected_language ,
103
+ title = f'Yearly Predictions for Next 5 Years for { selected_language } ' )
104
+ st .plotly_chart (fig2 )
105
+
106
+ # Plot 3: Pie chart of percentage questions predicted for input year month-wise
107
+ year_forecast_percent = month_forecast_df / month_forecast_df .sum () * 100
108
+ fig3 = px .pie (year_forecast_percent , values = selected_language ,
109
+ names = year_forecast_percent .index .strftime ('%B' ),
110
+ title = f'Percentage Question Distribution for { selected_language } in { future_year } ' )
111
+ st .plotly_chart (fig3 )
112
+
113
+ # Plot 4: Additional plot as requested (example: line plot for monthly trends)
114
+ fig4 = px .line (month_forecast_df , x = month_forecast_df .index , y = selected_language ,
115
+ title = f'Monthly Trends for { selected_language } ' )
116
+ fig4 .update_traces (mode = 'lines+markers' )
117
+ fig4 .update_layout (xaxis_title = 'Date' , yaxis_title = 'Number of Questions' , plot_bgcolor = 'rgba(0, 0, 0, 0)' )
118
+ st .plotly_chart (fig4 )
119
+
120
+ except ValueError as e :
121
+ st .error (f"Error: { e } " )
122
+
123
+ elif menu == 'Graphical Analysis' :
124
+
125
+ # Modify title style
126
+ st .markdown (
127
+ "<h1 style='color: #87CEEB; font-size: 36px;'>Graphical Analysis</h1>" ,
128
+ unsafe_allow_html = True
129
+ )
130
+ st .markdown ("---" , unsafe_allow_html = True )
131
+
132
+ # Load data
133
+ df = load_data ()
134
+
135
+ # 1) Annual Line Chart
136
+ df_annual = df .resample ('A' ).sum ()
137
+ fig1 = px .line (df_annual , x = df_annual .index , y = df_annual .columns ,
138
+ title = 'Timeline of the number of questions per category (2008-2024)' )
139
+ st .plotly_chart (fig1 )
140
+
141
+ # 2) Change in Question Counts Over Time
142
+ df_change = df .diff ()
143
+ fig2 = px .line (df_change , x = df_change .index , y = df_change .columns ,
144
+ title = 'Change in Question Counts for Each Programming Language Over Time' )
145
+ st .plotly_chart (fig2 )
146
+
147
+ # 4) Total Number of Questions by Programming Languages
148
+ total_questions_by_language = df .sum ().sort_values (ascending = False )
149
+ fig4 = px .bar (x = total_questions_by_language .index , y = total_questions_by_language .values ,
150
+ title = 'Total Number of Questions by Programming Languages' )
151
+ st .plotly_chart (fig4 )
152
+
153
+ # 5) Individual Temporal Series for Top 5 Languages
154
+ top_5_data = df .sum ().sort_values (ascending = False ).head (5 )
155
+ top_5_languages = top_5_data .index .tolist ()
156
+ df_top_5 = df [top_5_languages ]
157
+ fig5 = px .line (df_top_5 , x = df_top_5 .index , y = df_top_5 .columns ,
158
+ title = 'Individual Temporal Series for Top 5 Languages' )
159
+ st .plotly_chart (fig5 )
160
+
161
+ # 6) Total Number of Questions by Day of the Week
162
+ daily_total_questions = df .groupby (df .index .dayofweek ).sum ().sum (axis = 1 )
163
+ fig6 = px .bar (x = ['Monday' , 'Tuesday' , 'Wednesday' , 'Thursday' , 'Friday' , 'Saturday' , 'Sunday' ], y = daily_total_questions .values ,
164
+ title = 'Total Number of Questions by Day of the Week' )
165
+ st .plotly_chart (fig6 )
166
+
167
+ # 7) Heatmap of the Correlation Between Programming Languages
168
+ correlation_matrix = df .corr ()
169
+ # Replace 'coolwarm' with a valid Plotly colorscale or a custom colorscale definition
170
+ fig7 = px .imshow (correlation_matrix , color_continuous_scale = 'thermal' ,title = 'Correlation Heatmap of Programming Languages' )
171
+
172
+ # Display the plot using Streamlit
173
+ st .plotly_chart (fig7 )
174
+
175
+ # 8) Distribution of Questions for Top 10 Languages
176
+ top_10_data = df .sum ().sort_values (ascending = False ).head (10 )
177
+ top_10_languages = top_10_data .index .tolist ()
178
+ df_top_10 = df [top_10_languages ]
179
+ fig8 = px .box (df_top_10 , y = df_top_10 .columns , title = 'Distribution of Questions for Top 10 Programming Languages' )
180
+ st .plotly_chart (fig8 )
181
+
182
+ # Extract top 10 languages by total questions
183
+ top_10_data = df .sum ().sort_values (ascending = False ).head (10 )
184
+ top_10_languages = top_10_data .index .tolist ()
185
+
186
+ # Filter the DataFrame to include only the top 10 languages
187
+ df_top_10 = df [top_10_languages ]
188
+
189
+ # Calculate correlation matrix
190
+ corr_matrix = df_top_10 .corr ()
191
+
192
+ # Create a graph from the correlation matrix
193
+ G = nx .from_numpy_array (corr_matrix .values )
194
+
195
+ # Plotting the network
196
+ plt .figure (figsize = (12 , 8 ))
197
+ plt .style .use ('dark_background' )
198
+ pos = nx .spring_layout (G , seed = 42 ) # positions for all nodes
199
+
200
+ # Draw nodes
201
+ nx .draw_networkx_nodes (G , pos , node_size = 1500 , node_color = 'skyblue' , edgecolors = 'grey' )
202
+
203
+ # Draw edges
204
+ nx .draw_networkx_edges (G , pos , edge_color = 'grey' )
205
+
206
+ # Draw labels
207
+ nx .draw_networkx_labels (G , pos , labels = {i : top_10_languages [i ] for i in range (len (top_10_languages ))}, font_size = 10 ,
208
+ font_weight = 'bold' )
209
+
210
+ plt .title ('Network Plot of Top 10 Programming Languages based on Correlation' )
211
+ plt .show ()
212
+
213
+ # Displaying both graphs sequentially
214
+ import streamlit as st
215
+
216
+ # Display Matplotlib graph
217
+ st .pyplot (plt )
218
+
219
+
220
+ elif menu == 'Timeline Visualization' :
221
+ if menu == 'Timeline Visualization' :
222
+ st .markdown (
223
+ "<h1 style='color: #87CEEB; font-size: 36px;'>Timeline Visualization</h1>" ,
224
+ unsafe_allow_html = True
225
+ )
226
+ st .markdown ("---" , unsafe_allow_html = True )
227
+
228
+ # JavaScript to attempt autoplay
229
+ autoplay_js = """
230
+ <script>
231
+ document.addEventListener('DOMContentLoaded', function(event) {
232
+ var video = document.getElementById('autoplay-video');
233
+ video.play();
234
+ });
235
+ </script>
236
+ """
237
+ st .markdown (autoplay_js , unsafe_allow_html = True )
238
+
239
+ # Display the MP4 video with autoplay and larger size
240
+ video_path = 'stack_overflow.mp4' # Replace with your actual video file path
241
+ video_bytes = open (video_path , 'rb' ).read ()
242
+ st .video (video_bytes , start_time = 0 )
0 commit comments