Skip to content

Commit 011dcfa

Browse files
authored
Add files via upload
1 parent 53832bc commit 011dcfa

File tree

6 files changed

+459
-0
lines changed

6 files changed

+459
-0
lines changed
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
import streamlit as st
2+
import pandas as pd
3+
import numpy as np
4+
import plotly.express as px
5+
import matplotlib.pyplot as plt
6+
import seaborn as sns
7+
from statsmodels.tsa.arima.model import ARIMA
8+
import datetime
9+
import plotly.graph_objects as go
10+
import networkx as nx
11+
12+
13+
# Load and preprocess data using st.cache
14+
st.cache_data(hash_funcs={pd.DataFrame: lambda _: None})
15+
def load_data():
16+
df = pd.read_csv('TotalQuestions.csv', parse_dates=['Month'])
17+
df.set_index('Month', inplace=True)
18+
return df
19+
20+
21+
# Sidebar navigation
22+
menu = st.sidebar.selectbox('Navigation', ['Stack Overflow Question Forecast', 'Graphical Analysis', 'Timeline Visualization'])
23+
24+
if menu == 'Stack Overflow Question Forecast':
25+
# Load data
26+
df = load_data()
27+
languages = df.columns.tolist()
28+
29+
30+
def forecast_questions(df, language, future_month, future_year):
31+
model = ARIMA(df[language], order=(5, 1, 0)) # Simple ARIMA model for demonstration
32+
model_fit = model.fit()
33+
last_date = df.index[-1]
34+
future_date = pd.to_datetime(f'{future_year}-{future_month:02d}-01')
35+
months_ahead = (future_date.year - last_date.year) * 12 + future_date.month - last_date.month
36+
if months_ahead <= 0:
37+
raise ValueError("Prediction must have end after start.")
38+
forecast = model_fit.forecast(steps=months_ahead)
39+
return forecast.iloc[-1] # Correctly accessing the last forecasted value
40+
41+
42+
def generate_forecasts(df, language, start_date, periods):
43+
model = ARIMA(df[language], order=(5, 1, 0))
44+
model_fit = model.fit()
45+
forecast = model_fit.forecast(steps=periods)
46+
future_dates = pd.date_range(start=start_date, periods=periods, freq='M')
47+
forecast_df = pd.DataFrame({language: forecast}, index=future_dates)
48+
return forecast_df
49+
50+
51+
# Modify title style
52+
st.markdown(
53+
"<h1 style='color: #87CEEB; font-size: 36px;'>Stack Overflow Question Forecast</h1>",
54+
unsafe_allow_html=True
55+
)
56+
st.markdown("---", unsafe_allow_html=True)
57+
st.subheader('Select Programming Language')
58+
selected_language = st.selectbox('', languages)
59+
60+
col1, col2 = st.columns(2)
61+
with col1:
62+
st.subheader('Select Future Month')
63+
future_month = st.selectbox('', list(range(1, 13)),
64+
format_func=lambda x: datetime.date(1900, x, 1).strftime('%B'))
65+
with col2:
66+
st.subheader('Select Future Year')
67+
future_year = st.selectbox('', list(range(datetime.datetime.now().year, datetime.datetime.now().year + 6)))
68+
69+
# Forecast for the selected month and year
70+
if st.button('Predict'):
71+
try:
72+
prediction = forecast_questions(df, selected_language, future_month, future_year)
73+
st.markdown(
74+
f"<div style='background-color: green; color: white; padding: 10px; border-radius: 5px;'><strong>Predicted number of questions for {selected_language} in {datetime.date(1900, future_month, 1).strftime('%B')} {future_year}: <span style='color: red;'>{int(prediction)}</span></strong></div>",
75+
unsafe_allow_html=True)
76+
77+
# Generate additional forecasts for plots
78+
start_date = df.index[-1] + pd.offsets.MonthBegin()
79+
forecast_df = generate_forecasts(df, selected_language, start_date, 12)
80+
81+
# Plot 1: Count plot of total questions for each month in the selected year
82+
months = pd.date_range(start=f'{future_year}-01-01', end=f'{future_year}-12-31', freq='M')
83+
month_forecasts = [forecast_questions(df, selected_language, month.month, month.year) for month in months]
84+
month_forecast_df = pd.DataFrame({selected_language: month_forecasts}, index=months)
85+
86+
fig1 = px.bar(month_forecast_df, x=month_forecast_df.index.strftime('%B'), y=selected_language,
87+
title=f'Monthly Predictions for {future_year}')
88+
st.plotly_chart(fig1)
89+
90+
# Plot 2: Sum of total number of questions for the next five years including the predicted year
91+
future_years = list(range(datetime.datetime.now().year, future_year + 5))
92+
year_forecasts = []
93+
for year in future_years:
94+
if year <= df.index[-1].year:
95+
year_forecasts.append(df[df.index.year == year][selected_language].sum())
96+
else:
97+
months = pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31', freq='M')
98+
year_forecasts.append(
99+
sum([forecast_questions(df, selected_language, month.month, month.year) for month in months]))
100+
year_forecast_df = pd.DataFrame({selected_language: year_forecasts}, index=future_years)
101+
102+
fig2 = px.bar(year_forecast_df, x=year_forecast_df.index, y=selected_language,
103+
title=f'Yearly Predictions for Next 5 Years for {selected_language}')
104+
st.plotly_chart(fig2)
105+
106+
# Plot 3: Pie chart of percentage questions predicted for input year month-wise
107+
year_forecast_percent = month_forecast_df / month_forecast_df.sum() * 100
108+
fig3 = px.pie(year_forecast_percent, values=selected_language,
109+
names=year_forecast_percent.index.strftime('%B'),
110+
title=f'Percentage Question Distribution for {selected_language} in {future_year}')
111+
st.plotly_chart(fig3)
112+
113+
# Plot 4: Additional plot as requested (example: line plot for monthly trends)
114+
fig4 = px.line(month_forecast_df, x=month_forecast_df.index, y=selected_language,
115+
title=f'Monthly Trends for {selected_language}')
116+
fig4.update_traces(mode='lines+markers')
117+
fig4.update_layout(xaxis_title='Date', yaxis_title='Number of Questions', plot_bgcolor='rgba(0, 0, 0, 0)')
118+
st.plotly_chart(fig4)
119+
120+
except ValueError as e:
121+
st.error(f"Error: {e}")
122+
123+
elif menu == 'Graphical Analysis':
124+
125+
# Modify title style
126+
st.markdown(
127+
"<h1 style='color: #87CEEB; font-size: 36px;'>Graphical Analysis</h1>",
128+
unsafe_allow_html=True
129+
)
130+
st.markdown("---", unsafe_allow_html=True)
131+
132+
# Load data
133+
df = load_data()
134+
135+
# 1) Annual Line Chart
136+
df_annual = df.resample('A').sum()
137+
fig1 = px.line(df_annual, x=df_annual.index, y=df_annual.columns,
138+
title='Timeline of the number of questions per category (2008-2024)')
139+
st.plotly_chart(fig1)
140+
141+
# 2) Change in Question Counts Over Time
142+
df_change = df.diff()
143+
fig2 = px.line(df_change, x=df_change.index, y=df_change.columns,
144+
title='Change in Question Counts for Each Programming Language Over Time')
145+
st.plotly_chart(fig2)
146+
147+
# 4) Total Number of Questions by Programming Languages
148+
total_questions_by_language = df.sum().sort_values(ascending=False)
149+
fig4 = px.bar(x=total_questions_by_language.index, y=total_questions_by_language.values,
150+
title='Total Number of Questions by Programming Languages')
151+
st.plotly_chart(fig4)
152+
153+
# 5) Individual Temporal Series for Top 5 Languages
154+
top_5_data = df.sum().sort_values(ascending=False).head(5)
155+
top_5_languages = top_5_data.index.tolist()
156+
df_top_5 = df[top_5_languages]
157+
fig5 = px.line(df_top_5, x=df_top_5.index, y=df_top_5.columns,
158+
title='Individual Temporal Series for Top 5 Languages')
159+
st.plotly_chart(fig5)
160+
161+
# 6) Total Number of Questions by Day of the Week
162+
daily_total_questions = df.groupby(df.index.dayofweek).sum().sum(axis=1)
163+
fig6 = px.bar(x=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], y=daily_total_questions.values,
164+
title='Total Number of Questions by Day of the Week')
165+
st.plotly_chart(fig6)
166+
167+
# 7) Heatmap of the Correlation Between Programming Languages
168+
correlation_matrix = df.corr()
169+
# Replace 'coolwarm' with a valid Plotly colorscale or a custom colorscale definition
170+
fig7 = px.imshow(correlation_matrix, color_continuous_scale='thermal',title='Correlation Heatmap of Programming Languages')
171+
172+
# Display the plot using Streamlit
173+
st.plotly_chart(fig7)
174+
175+
# 8) Distribution of Questions for Top 10 Languages
176+
top_10_data = df.sum().sort_values(ascending=False).head(10)
177+
top_10_languages = top_10_data.index.tolist()
178+
df_top_10 = df[top_10_languages]
179+
fig8 = px.box(df_top_10, y=df_top_10.columns, title='Distribution of Questions for Top 10 Programming Languages')
180+
st.plotly_chart(fig8)
181+
182+
# Extract top 10 languages by total questions
183+
top_10_data = df.sum().sort_values(ascending=False).head(10)
184+
top_10_languages = top_10_data.index.tolist()
185+
186+
# Filter the DataFrame to include only the top 10 languages
187+
df_top_10 = df[top_10_languages]
188+
189+
# Calculate correlation matrix
190+
corr_matrix = df_top_10.corr()
191+
192+
# Create a graph from the correlation matrix
193+
G = nx.from_numpy_array(corr_matrix.values)
194+
195+
# Plotting the network
196+
plt.figure(figsize=(12, 8))
197+
plt.style.use('dark_background')
198+
pos = nx.spring_layout(G, seed=42) # positions for all nodes
199+
200+
# Draw nodes
201+
nx.draw_networkx_nodes(G, pos, node_size=1500, node_color='skyblue', edgecolors='grey')
202+
203+
# Draw edges
204+
nx.draw_networkx_edges(G, pos, edge_color='grey')
205+
206+
# Draw labels
207+
nx.draw_networkx_labels(G, pos, labels={i: top_10_languages[i] for i in range(len(top_10_languages))}, font_size=10,
208+
font_weight='bold')
209+
210+
plt.title('Network Plot of Top 10 Programming Languages based on Correlation')
211+
plt.show()
212+
213+
# Displaying both graphs sequentially
214+
import streamlit as st
215+
216+
# Display Matplotlib graph
217+
st.pyplot(plt)
218+
219+
220+
elif menu == 'Timeline Visualization':
221+
if menu == 'Timeline Visualization':
222+
st.markdown(
223+
"<h1 style='color: #87CEEB; font-size: 36px;'>Timeline Visualization</h1>",
224+
unsafe_allow_html=True
225+
)
226+
st.markdown("---", unsafe_allow_html=True)
227+
228+
# JavaScript to attempt autoplay
229+
autoplay_js = """
230+
<script>
231+
document.addEventListener('DOMContentLoaded', function(event) {
232+
var video = document.getElementById('autoplay-video');
233+
video.play();
234+
});
235+
</script>
236+
"""
237+
st.markdown(autoplay_js, unsafe_allow_html=True)
238+
239+
# Display the MP4 video with autoplay and larger size
240+
video_path = 'stack_overflow.mp4' # Replace with your actual video file path
241+
video_bytes = open(video_path, 'rb').read()
242+
st.video(video_bytes, start_time=0)
Binary file not shown.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
## setup
3+
4+
1. **Clone the Repository**:
5+
```sh
6+
git clone url_to_this_repository
7+
```
8+
9+
2. **Install Dependencies**:
10+
```sh
11+
pip install -r requirements.txt
12+
```
13+
14+
3. **Run the Model**:
15+
```python
16+
streamlit run app.py
17+
```
18+
19+
4. **demo** :
20+
![Demo Video](C:/Users/rajvk/Downloads/stack_overflow_programming_language_analysis/demovideo.mp4)
21+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pandas
2+
numpy
3+
scikit-learn
4+
seaborn
5+
plotly
6+
matplotlib
7+
streamlit
8+
networkx
9+
statsmodels
10+
Binary file not shown.

0 commit comments

Comments
 (0)