Skip to content

Commit ac0b8e4

Browse files
examin the impact on participation rates due to different ethnicities
1 parent f91845c commit ac0b8e4

File tree

2 files changed

+136
-8
lines changed

2 files changed

+136
-8
lines changed

.ipynb_checkpoints/Stackoverflow_Survey_Analysis-checkpoint.ipynb

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21729,33 +21729,97 @@
2172921729
"plt.show()\n"
2173021730
]
2173121731
},
21732+
{
21733+
"cell_type": "markdown",
21734+
"metadata": {},
21735+
"source": [
21736+
"# Examine the impact on participation rates due to different ethnicities."
21737+
]
21738+
},
2173221739
{
2173321740
"cell_type": "code",
2173421741
"execution_count": null,
2173521742
"metadata": {},
2173621743
"outputs": [],
21737-
"source": []
21744+
"source": [
21745+
"import pandas as pd\n",
21746+
"\n",
21747+
"# Load the CSV files\n",
21748+
"file1 = pd.read_csv(r\"Data/survey_results_public_2018.csv\")\n",
21749+
"file2 = pd.read_csv(r\"Data/survey_results_public_2019.csv\")\n",
21750+
"file3 = pd.read_csv(r\"Data/survey_results_public_2020.csv\")\n",
21751+
"\n",
21752+
"# Merge the data\n",
21753+
"merged_data = pd.concat([file1, file2, file3], ignore_index=True)\n"
21754+
]
2173821755
},
2173921756
{
2174021757
"cell_type": "code",
2174121758
"execution_count": null,
2174221759
"metadata": {},
2174321760
"outputs": [],
21744-
"source": []
21761+
"source": [
21762+
"def preprocess_data(df):\n",
21763+
" # Convert compensation to numeric, ignoring non-numeric values\n",
21764+
" df['ConvertedComp'] = pd.to_numeric(df['ConvertedComp'], errors='coerce')\n",
21765+
" \n",
21766+
" # Fill missing values in relevant columns\n",
21767+
" df['EdLevel'] = df['EdLevel'].fillna('Unknown')\n",
21768+
" df['YearsCodePro'] = pd.to_numeric(df['YearsCodePro'], errors='coerce').fillna(0)\n",
21769+
" df['Gender'] = df['Gender'].fillna('Unknown')\n",
21770+
" df['Ethnicity'] = df['Ethnicity'].fillna('Unknown')\n",
21771+
" \n",
21772+
" # Drop rows where ConvertedComp is NaN\n",
21773+
" df = df.dropna(subset=['ConvertedComp'])\n",
21774+
" \n",
21775+
" return df\n",
21776+
"\n",
21777+
"# Preprocess the data\n",
21778+
"cleaned_data = preprocess_data(merged_data)\n"
21779+
]
2174521780
},
2174621781
{
2174721782
"cell_type": "code",
2174821783
"execution_count": null,
2174921784
"metadata": {},
2175021785
"outputs": [],
21751-
"source": []
21786+
"source": [
21787+
"# Count the number of respondents by ethnicity\n",
21788+
"ethnicity_counts = cleaned_data['Ethnicity'].value_counts()\n",
21789+
"\n",
21790+
"# Calculate average salary by ethnicity\n",
21791+
"avg_salary_by_ethnicity = cleaned_data.groupby('Ethnicity')['ConvertedComp'].mean()\n",
21792+
"\n",
21793+
"print(\"Number of Respondents by Ethnicity:\")\n",
21794+
"print(ethnicity_counts)\n",
21795+
"print(\"\\nAverage Salary by Ethnicity:\")\n",
21796+
"print(avg_salary_by_ethnicity)\n"
21797+
]
2175221798
},
2175321799
{
2175421800
"cell_type": "code",
2175521801
"execution_count": null,
2175621802
"metadata": {},
2175721803
"outputs": [],
21758-
"source": []
21804+
"source": [
21805+
"import matplotlib.pyplot as plt\n",
21806+
"\n",
21807+
"# Number of Respondents by Ethnicity\n",
21808+
"plt.figure(figsize=(10, 6))\n",
21809+
"ethnicity_counts.plot(kind='bar')\n",
21810+
"plt.title('Number of Respondents by Ethnicity')\n",
21811+
"plt.xlabel('Ethnicity')\n",
21812+
"plt.ylabel('Number of Respondents')\n",
21813+
"plt.show()\n",
21814+
"\n",
21815+
"# Average Salary by Ethnicity\n",
21816+
"plt.figure(figsize=(10, 6))\n",
21817+
"avg_salary_by_ethnicity.plot(kind='bar')\n",
21818+
"plt.title('Average Salary by Ethnicity')\n",
21819+
"plt.xlabel('Ethnicity')\n",
21820+
"plt.ylabel('Average Salary (ConvertedComp)')\n",
21821+
"plt.show()\n"
21822+
]
2175921823
},
2176021824
{
2176121825
"cell_type": "markdown",

Stackoverflow_Survey_Analysis.ipynb

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21729,33 +21729,97 @@
2172921729
"plt.show()\n"
2173021730
]
2173121731
},
21732+
{
21733+
"cell_type": "markdown",
21734+
"metadata": {},
21735+
"source": [
21736+
"# Examine the impact on participation rates due to different ethnicities."
21737+
]
21738+
},
2173221739
{
2173321740
"cell_type": "code",
2173421741
"execution_count": null,
2173521742
"metadata": {},
2173621743
"outputs": [],
21737-
"source": []
21744+
"source": [
21745+
"import pandas as pd\n",
21746+
"\n",
21747+
"# Load the CSV files\n",
21748+
"file1 = pd.read_csv(r\"Data/survey_results_public_2018.csv\")\n",
21749+
"file2 = pd.read_csv(r\"Data/survey_results_public_2019.csv\")\n",
21750+
"file3 = pd.read_csv(r\"Data/survey_results_public_2020.csv\")\n",
21751+
"\n",
21752+
"# Merge the data\n",
21753+
"merged_data = pd.concat([file1, file2, file3], ignore_index=True)\n"
21754+
]
2173821755
},
2173921756
{
2174021757
"cell_type": "code",
2174121758
"execution_count": null,
2174221759
"metadata": {},
2174321760
"outputs": [],
21744-
"source": []
21761+
"source": [
21762+
"def preprocess_data(df):\n",
21763+
" # Convert compensation to numeric, ignoring non-numeric values\n",
21764+
" df['ConvertedComp'] = pd.to_numeric(df['ConvertedComp'], errors='coerce')\n",
21765+
" \n",
21766+
" # Fill missing values in relevant columns\n",
21767+
" df['EdLevel'] = df['EdLevel'].fillna('Unknown')\n",
21768+
" df['YearsCodePro'] = pd.to_numeric(df['YearsCodePro'], errors='coerce').fillna(0)\n",
21769+
" df['Gender'] = df['Gender'].fillna('Unknown')\n",
21770+
" df['Ethnicity'] = df['Ethnicity'].fillna('Unknown')\n",
21771+
" \n",
21772+
" # Drop rows where ConvertedComp is NaN\n",
21773+
" df = df.dropna(subset=['ConvertedComp'])\n",
21774+
" \n",
21775+
" return df\n",
21776+
"\n",
21777+
"# Preprocess the data\n",
21778+
"cleaned_data = preprocess_data(merged_data)\n"
21779+
]
2174521780
},
2174621781
{
2174721782
"cell_type": "code",
2174821783
"execution_count": null,
2174921784
"metadata": {},
2175021785
"outputs": [],
21751-
"source": []
21786+
"source": [
21787+
"# Count the number of respondents by ethnicity\n",
21788+
"ethnicity_counts = cleaned_data['Ethnicity'].value_counts()\n",
21789+
"\n",
21790+
"# Calculate average salary by ethnicity\n",
21791+
"avg_salary_by_ethnicity = cleaned_data.groupby('Ethnicity')['ConvertedComp'].mean()\n",
21792+
"\n",
21793+
"print(\"Number of Respondents by Ethnicity:\")\n",
21794+
"print(ethnicity_counts)\n",
21795+
"print(\"\\nAverage Salary by Ethnicity:\")\n",
21796+
"print(avg_salary_by_ethnicity)\n"
21797+
]
2175221798
},
2175321799
{
2175421800
"cell_type": "code",
2175521801
"execution_count": null,
2175621802
"metadata": {},
2175721803
"outputs": [],
21758-
"source": []
21804+
"source": [
21805+
"import matplotlib.pyplot as plt\n",
21806+
"\n",
21807+
"# Number of Respondents by Ethnicity\n",
21808+
"plt.figure(figsize=(10, 6))\n",
21809+
"ethnicity_counts.plot(kind='bar')\n",
21810+
"plt.title('Number of Respondents by Ethnicity')\n",
21811+
"plt.xlabel('Ethnicity')\n",
21812+
"plt.ylabel('Number of Respondents')\n",
21813+
"plt.show()\n",
21814+
"\n",
21815+
"# Average Salary by Ethnicity\n",
21816+
"plt.figure(figsize=(10, 6))\n",
21817+
"avg_salary_by_ethnicity.plot(kind='bar')\n",
21818+
"plt.title('Average Salary by Ethnicity')\n",
21819+
"plt.xlabel('Ethnicity')\n",
21820+
"plt.ylabel('Average Salary (ConvertedComp)')\n",
21821+
"plt.show()\n"
21822+
]
2175921823
},
2176021824
{
2176121825
"cell_type": "markdown",

0 commit comments

Comments
 (0)