Skip to content

Commit 68f6c9d

Browse files
authored
Merge pull request #217 from righettod/master
Add stats tab.
2 parents 21516dc + 579ff83 commit 68f6c9d

File tree

7 files changed

+483
-0
lines changed

7 files changed

+483
-0
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: update_tab_stats_related_files
2+
on:
3+
workflow_dispatch:
4+
push:
5+
schedule:
6+
- cron: '0 0 3 * *'
7+
jobs:
8+
build:
9+
runs-on: ubuntu-latest
10+
permissions:
11+
contents: write
12+
steps:
13+
- uses: actions/checkout@v4
14+
- name: Set up Python 3.10
15+
uses: actions/setup-python@v5
16+
with:
17+
python-version: "3.10"
18+
- name: Install dependencies
19+
run: |
20+
python -m pip install --upgrade pip
21+
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
22+
sudo apt install wget
23+
- name: Run update of the tab related files
24+
run: |
25+
cd ci; bash tab_stats_manage_generation.sh
26+
- name: Set up Git user
27+
run: git config --global user.email "[email protected]"; git config --global user.name "GHActionBot"
28+
- name: Commit update
29+
run: git add --all; git commit -am "Sync tab stats related files"; git push
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
NEVER DELETE ME!!!!

ci/tab_stats_generate_md_file.py

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
#!/usr/bin/python3
2+
"""
3+
Script using the gathered data from the OSHP project "oshp-stats" to generate/update the
4+
markdown file "tab_stats.md" with mermaid pie charts with differents statistics about HTTP security headers usage.
5+
6+
Source:
7+
https://mermaid-js.github.io/mermaid/#/pie
8+
https://github.com/oshp/oshp-stats/
9+
"""
10+
import sqlite3
11+
import re
12+
import requests
13+
import hashlib
14+
from collections import Counter
15+
from datetime import datetime
16+
from pathlib import Path
17+
18+
# Constants
19+
HTTP_REQUEST_TIMEOUT = 60
20+
DATA_DB_FILE = "/tmp/data.db"
21+
OSHP_SECURITY_HEADERS_FILE_lOCATION = "https://owasp.org/www-project-secure-headers/ci/headers_add.json"
22+
MD_FILE = "../tab_stats.md"
23+
IMAGE_FOLDER_LOCATION = "../assets/tab_stats_generated_images"
24+
TAB_MD_TEMPLATE = """---
25+
title: statistics
26+
displaytext: Statistics
27+
layout: null
28+
tab: true
29+
order: 10
30+
tags: headers
31+
---
32+
33+
<!-- All the content of this file is generated by the script "ci/tab_stats_generate_md_file.py" -->
34+
35+
<!-- DO NOT EDIT IT MANUALLY -->
36+
37+
# Statistic about HTTP security response headers usage
38+
39+
<!-- markdown-link-check-disable -->
40+
41+
"""
42+
SECTION_TEMPLATE = f"""
43+
## %s
44+
45+
%s
46+
47+
![%s]({IMAGE_FOLDER_LOCATION.replace('../', '')}/%s)
48+
"""
49+
SECTION_TEMPLATE_NO_MERMAID_CODE = """
50+
## %s
51+
52+
%s
53+
"""
54+
55+
# Utility functions
56+
57+
58+
def prepare_generation_of_image_from_mermaid(mermaid_code, filename):
59+
with open(f"{IMAGE_FOLDER_LOCATION}/{filename}.mmd", "w", encoding="utf-8") as f:
60+
f.write(mermaid_code + "\n")
61+
62+
63+
def load_oshp_headers():
64+
header_names = []
65+
resp = requests.get(OSHP_SECURITY_HEADERS_FILE_lOCATION, timeout=HTTP_REQUEST_TIMEOUT)
66+
if resp.status_code != 200:
67+
raise Exception(f"Status code {resp.status_code} received!")
68+
for http_header in resp.json()["headers"]:
69+
header_names.append(http_header["name"])
70+
header_names.sort()
71+
return header_names
72+
73+
74+
def execute_query_against_data_db(sql_query):
75+
with sqlite3.connect(DATA_DB_FILE) as connection:
76+
curs = connection.cursor()
77+
curs.execute(sql_query)
78+
records = curs.fetchall()
79+
return records
80+
81+
82+
def add_stats_section(title, description, chart_mermaid_code):
83+
with open(MD_FILE, mode="a", encoding="utf-8") as f:
84+
if chart_mermaid_code is not None and len(chart_mermaid_code.strip()) > 0:
85+
base_image_filename = hashlib.sha1(title.encode("utf8")).hexdigest()
86+
prepare_generation_of_image_from_mermaid(chart_mermaid_code, base_image_filename)
87+
md_code = SECTION_TEMPLATE % (title, description, base_image_filename, f"{base_image_filename}.png")
88+
else:
89+
md_code = SECTION_TEMPLATE_NO_MERMAID_CODE % (title, description)
90+
f.write(f"{md_code}\n")
91+
92+
93+
def init_stats_file():
94+
with open(MD_FILE, mode="w", encoding="utf-8") as f:
95+
cdate = datetime.now().strftime("%m/%d/%Y at %H:%M:%S")
96+
f.write(TAB_MD_TEMPLATE)
97+
f.write("\n\n")
98+
f.write(f"⏲️ Last update: {cdate} - Domains analyzed count: {get_domains_count()}.\n")
99+
100+
101+
def get_domains_count():
102+
return len(execute_query_against_data_db("select distinct domain from stats"))
103+
104+
105+
def get_pie_chart_code(title, dataset_tuples):
106+
# code = f"pie title {title}\n"
107+
code = f"pie\n"
108+
for dataset_tuple in dataset_tuples:
109+
# Note: Mermaid use integer value when rendering
110+
code += f"\t\"{dataset_tuple[0]}\" : {round(dataset_tuple[1], 2)}\n"
111+
return code
112+
113+
114+
def csp_contain_unsafe_expression(csp_policy):
115+
contain_unsafe_expression = False
116+
# Determine if a CSP policy contains (default-src|script-src|script-src-elem|script-src-attr|style-src) directives using (unsafe-inline|unsafe-hashes|unsafe-eval) expressions
117+
# Based on "https://report-uri.com/home/generate" generator allowed instructions for CSP directives
118+
exp_all_unsafe_expressions = r'(unsafe-inline|unsafe-hashes|unsafe-eval)'
119+
exp_style_unsafe_expressions = r'(unsafe-inline|unsafe-hashes)'
120+
exp_directive_name_allowing_all_unsafe_expressions = r'(default-src|script-src|script-src-elem|script-src-attr)'
121+
directives = csp_policy.split(";")
122+
for directive in directives:
123+
if len(re.findall(exp_directive_name_allowing_all_unsafe_expressions, directive)) > 0 and len(re.findall(exp_all_unsafe_expressions, directive)) > 0:
124+
contain_unsafe_expression = True
125+
break
126+
elif directive.strip().startswith("style-src") and len(re.findall(exp_style_unsafe_expressions, directive)) > 0:
127+
contain_unsafe_expression = True
128+
break
129+
return contain_unsafe_expression
130+
131+
132+
# Functions in charge of generate stats sections
133+
134+
135+
def compute_header_global_usage(header_name):
136+
title = f"Global usage of header '{header_name}'"
137+
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed."
138+
# Prevent the case in which a domain specify X times the same headers...
139+
query = f"select distinct domain from stats where lower(http_header_name) = '{header_name}'"
140+
count_of_domains_using_the_header = len(
141+
execute_query_against_data_db(query))
142+
domains_count = get_domains_count()
143+
percentage_of_domains_using_the_header = (
144+
count_of_domains_using_the_header * 100) / domains_count
145+
dataset_tuples = [("Using it", percentage_of_domains_using_the_header),
146+
("Not using it", (100-percentage_of_domains_using_the_header))]
147+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
148+
add_stats_section(title, description, pie_chart_code)
149+
150+
151+
def compute_insecure_framing_configuration_global_usage():
152+
header_name = "x-frame-options"
153+
title = f"Global usage of insecure framing configuration via the header '{header_name}'"
154+
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure framing configuration: value different from `DENY` or `SAMEORIGIN` including unsupported values."
155+
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not in ('deny','sameorigin')"
156+
count_of_domains = execute_query_against_data_db(query)[0][0]
157+
domains_count = get_domains_count()
158+
percentage_of_domains = (count_of_domains * 100) / domains_count
159+
dataset_tuples = [("Insecure conf", percentage_of_domains),
160+
("Secure conf", (100-percentage_of_domains))]
161+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
162+
add_stats_section(title, description, pie_chart_code)
163+
164+
165+
def compute_hsts_preload_global_usage():
166+
header_name = "strict-transport-security"
167+
title = "Global usage of the Strict Transport Security 'preload' feature"
168+
description = f"Provide the distribution of usage of the '[preload](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security#preloading_strict_transport_security)' feature for the header '{header_name}' across all domains analyzed."
169+
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not like '%preload%'"
170+
count_of_domains = execute_query_against_data_db(query)[0][0]
171+
domains_count = get_domains_count()
172+
percentage_of_domains = (count_of_domains * 100) / domains_count
173+
dataset_tuples = [("Using it", percentage_of_domains),
174+
("Not using it", (100-percentage_of_domains))]
175+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
176+
add_stats_section(title, description, pie_chart_code)
177+
178+
179+
def compute_secure_headers_global_usage():
180+
title = "Global usage of secure headers"
181+
description = f"Provide the distribution of usage of secure headers across all domains analyzed."
182+
query = "select count(domain) from stats where http_header_name is NULL"
183+
count_of_domains = execute_query_against_data_db(query)[0][0]
184+
domains_count = get_domains_count()
185+
percentage_of_domains = (count_of_domains * 100) / domains_count
186+
dataset_tuples = [("Not using them", percentage_of_domains),
187+
("Using them", (100-percentage_of_domains))]
188+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
189+
add_stats_section(title, description, pie_chart_code)
190+
191+
192+
def compute_insecure_referrer_configuration_global_usage():
193+
header_name = "referrer-policy"
194+
title = f"Global usage of insecure referrer configuration via the header '{header_name}'"
195+
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure referrer configuration: value set to `unsafe-url` or `no-referrer-when-downgrade`.\n\n`no-referrer-when-downgrade` was included because it send origin, path, and querystring when the protocol security level stays the same (HTTPS is very often in place)."
196+
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) in ('unsafe-url','no-referrer-when-downgrade')"
197+
count_of_domains = execute_query_against_data_db(query)[0][0]
198+
domains_count = get_domains_count()
199+
percentage_of_domains = (count_of_domains * 100) / domains_count
200+
dataset_tuples = [("Insecure conf", percentage_of_domains),
201+
("Secure conf", (100-percentage_of_domains))]
202+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
203+
add_stats_section(title, description, pie_chart_code)
204+
205+
206+
def compute_hsts_average_maxage_global_usage():
207+
title = "Global common 'max-age' values of the Strict Transport Security header"
208+
query = "select lower(http_header_value) from stats where lower(http_header_name) = 'strict-transport-security' and lower(http_header_value) like '%max-age=%'"
209+
header_values = execute_query_against_data_db(query)
210+
expr = r'max-age\s*=\s*(\-?"?\d+"?)'
211+
# Gather values for max-age attribute
212+
values = []
213+
for header_value in header_values:
214+
v = header_value[0].strip('\n\r\t').replace('"', '')
215+
matches = re.findall(expr, v)
216+
if len(matches) > 0:
217+
values.append(int(matches[0]))
218+
# Find the most popular one
219+
occurences = Counter(values)
220+
maxage_most_popular_value = 0
221+
current_max_occurence_count = 0
222+
for maxage_value, occurence_count in occurences.items():
223+
if occurence_count > current_max_occurence_count:
224+
current_max_occurence_count = occurence_count
225+
maxage_most_popular_value = maxage_value
226+
description = f"* Most common value used is {maxage_most_popular_value} seconds ({round(maxage_most_popular_value/60)} minutes) across all domains analyzed."
227+
description += f"\n* Maximum value used is {max(values)} seconds ({round(max(values)/60)} minutes) across all domains analyzed."
228+
description += f"\n* Minimum value used is {min(values)} seconds ({round(min(values)/60)} minutes) across all domains analyzed."
229+
add_stats_section(title, description, None)
230+
231+
232+
def compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage():
233+
header_name = "content-security-policy"
234+
title = f"Global usage of content security policy with directives allowing unsafe expressions"
235+
description = f"Provide the distribution of content security policy allowing unsafe expressions across all domains analyzed.\n\nDetermine if a CSP policy contains `(default-src|script-src|script-src-elem|script-src-attr|style-src)` directives using `(unsafe-inline|unsafe-hashes|unsafe-eval)` expressions.\n\nBased on [Report-URI CSP](https://report-uri.com/home/generate) generator allowed instructions for CSP directives."
236+
query = f"select lower(http_header_value) from stats where lower(http_header_name) like '{header_name}%' and lower(http_header_value) like '%unsafe%'"
237+
header_values = execute_query_against_data_db(query)
238+
count_of_domains = 0
239+
for header_value in header_values:
240+
if csp_contain_unsafe_expression(header_value[0]):
241+
count_of_domains += 1
242+
domains_count = get_domains_count()
243+
percentage_of_domains = (count_of_domains * 100) / domains_count
244+
dataset_tuples = [("Using unsafe", percentage_of_domains),
245+
("Not using unsafe", (100-percentage_of_domains))]
246+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
247+
add_stats_section(title, description, pie_chart_code)
248+
249+
250+
if __name__ == "__main__":
251+
for path in Path(IMAGE_FOLDER_LOCATION).glob("*.png"):
252+
path.unlink()
253+
for path in Path(IMAGE_FOLDER_LOCATION).glob("*.mmd"):
254+
path.unlink()
255+
oshp_headers = load_oshp_headers()
256+
init_stats_file()
257+
compute_secure_headers_global_usage()
258+
for header_name in oshp_headers:
259+
compute_header_global_usage(header_name)
260+
compute_insecure_framing_configuration_global_usage()
261+
compute_insecure_referrer_configuration_global_usage()
262+
compute_hsts_preload_global_usage()
263+
compute_hsts_average_maxage_global_usage()
264+
compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage()

ci/tab_stats_generate_png_files.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
#########################################################################
3+
# Generate the PNG image files from corresponding MMD (mermaid) files.
4+
#
5+
# Dependencies:
6+
# https://github.com/mermaid-js/mermaid-cli
7+
#########################################################################
8+
# Constants
9+
IMAGE_FOLDER_LOCATION="../assets/tab_stats_generated_images"
10+
# Generate images
11+
cd $IMAGE_FOLDER_LOCATION
12+
for mmd_file in *.mmd
13+
do
14+
png_file="${mmd_file%%.*}.png"
15+
npx -p @mermaid-js/mermaid-cli mmdc --quiet --input $mmd_file --output $png_file --outputFormat png --theme default --backgroundColor transparent
16+
done
17+
# Only let PNG files
18+
rm *.mmd
19+
cd -

ci/tab_stats_manage_generation.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/bash
2+
#########################################################################
3+
# This script manage the generation/update of the tab represented by the
4+
# file "tab_stats.md".
5+
#########################################################################
6+
DATA_DB_FILE_LOCATION="https://github.com/oshp/oshp-stats/raw/refs/heads/main/data/data.db"
7+
DATA_DB_FILE="/tmp/data.db"
8+
echo "[+] Download the database of headers analysis..."
9+
wget -q -O $DATA_DB_FILE $DATA_DB_FILE_LOCATION
10+
file $DATA_DB_FILE
11+
chmod +x tab_stats_generate_*
12+
echo "[+] Generate the MD file of the TAB and all the MMD files for every pie chart image..."
13+
python tab_stats_generate_md_file.py
14+
echo "[+] Generate the PNG image corresponding to each MMD file..."
15+
bash tab_stats_generate_png_files.sh
16+
echo "[+] Cleanup"
17+
rm $DATA_DB_FILE

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
requests

0 commit comments

Comments
 (0)