Skip to content

Commit 03b9e05

Browse files
author
drighetto
committed
Work on #210
1 parent 21516dc commit 03b9e05

24 files changed

+479
-0
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: update_tab_stats_related_files
2+
on:
3+
workflow_dispatch:
4+
push:
5+
schedule:
6+
- cron: '0 0 3 * *'
7+
jobs:
8+
build:
9+
runs-on: ubuntu-latest
10+
permissions:
11+
contents: write
12+
steps:
13+
- uses: actions/checkout@v4
14+
- name: Set up Python 3.10
15+
uses: actions/setup-python@v5
16+
with:
17+
python-version: "3.10"
18+
- name: Install dependencies
19+
run: |
20+
python -m pip install --upgrade pip
21+
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
22+
sudo apt install wget
23+
- name: Run update of the tab related files
24+
run: |
25+
cd ci; bash tab_stats_manage_generation.sh
26+
- name: Set up Git user
27+
run: git config --global user.email "[email protected]"; git config --global user.name "GHActionBot"
28+
- name: Commit update
29+
run: git add --all; git commit -am "Sync tab stats related files"; git push
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
NEVER DELETE ME!!!!
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading

ci/tab_stats_generate_md_file.py

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
#!/usr/bin/python3
2+
"""
3+
Script using the gathered data from the OSHP project "oshp-stats" to generate/update the
4+
markdown file "tab_stats.md" with mermaid pie charts with differents statistics about HTTP security headers usage.
5+
6+
Source:
7+
https://mermaid-js.github.io/mermaid/#/pie
8+
https://github.com/oshp/oshp-stats/
9+
"""
10+
import sqlite3
11+
import re
12+
import requests
13+
import hashlib
14+
from collections import Counter
15+
from datetime import datetime
16+
from pathlib import Path
17+
18+
# Constants
19+
HTTP_REQUEST_TIMEOUT = 60
20+
DATA_DB_FILE = "/tmp/data.db"
21+
OSHP_SECURITY_HEADERS_FILE_lOCATION = "https://owasp.org/www-project-secure-headers/ci/headers_add.json"
22+
MD_FILE = "../tab_stats.md"
23+
IMAGE_FOLDER_LOCATION = "../assets/tab_stats_generated_images"
24+
TAB_MD_TEMPLATE = """---
25+
title: statistics
26+
displaytext: Statistics
27+
layout: null
28+
tab: true
29+
order: 10
30+
tags: headers
31+
---
32+
33+
<!-- All the content of this file is generated by the script "ci/tab_stats_generate_md_file.py" -->
34+
35+
<!-- DO NOT EDIT IT MANUALLY -->
36+
37+
# Statistic about HTTP security response headers usage
38+
39+
"""
40+
SECTION_TEMPLATE = f"""
41+
## %s
42+
43+
%s
44+
45+
![%s]({IMAGE_FOLDER_LOCATION.replace('../', '')}/%s)
46+
"""
47+
SECTION_TEMPLATE_NO_MERMAID_CODE = """
48+
## %s
49+
50+
%s
51+
"""
52+
53+
# Utility functions
54+
55+
56+
def prepare_generation_of_image_from_mermaid(mermaid_code, filename):
57+
with open(f"{IMAGE_FOLDER_LOCATION}/{filename}.mmd", "w", encoding="utf-8") as f:
58+
f.write(mermaid_code + "\n")
59+
60+
61+
def load_oshp_headers():
62+
header_names = []
63+
resp = requests.get(OSHP_SECURITY_HEADERS_FILE_lOCATION, timeout=HTTP_REQUEST_TIMEOUT)
64+
if resp.status_code != 200:
65+
raise Exception(f"Status code {resp.status_code} received!")
66+
for http_header in resp.json()["headers"]:
67+
header_names.append(http_header["name"])
68+
header_names.sort()
69+
return header_names
70+
71+
72+
def execute_query_against_data_db(sql_query):
73+
with sqlite3.connect(DATA_DB_FILE) as connection:
74+
curs = connection.cursor()
75+
curs.execute(sql_query)
76+
records = curs.fetchall()
77+
return records
78+
79+
80+
def add_stats_section(title, description, chart_mermaid_code):
81+
with open(MD_FILE, mode="a", encoding="utf-8") as f:
82+
if chart_mermaid_code is not None and len(chart_mermaid_code.strip()) > 0:
83+
base_image_filename = hashlib.sha1(title.encode("utf8")).hexdigest()
84+
prepare_generation_of_image_from_mermaid(chart_mermaid_code, base_image_filename)
85+
md_code = SECTION_TEMPLATE % (title, description, base_image_filename, f"{base_image_filename}.png")
86+
else:
87+
md_code = SECTION_TEMPLATE_NO_MERMAID_CODE % (title, description)
88+
f.write(f"{md_code}\n")
89+
90+
91+
def init_stats_file():
92+
with open(MD_FILE, mode="w", encoding="utf-8") as f:
93+
cdate = datetime.now().strftime("%m/%d/%Y at %H:%M:%S")
94+
f.write(TAB_MD_TEMPLATE)
95+
f.write("\n\n")
96+
f.write(f"⏲️ Last update: {cdate} - Domains analyzed count: {get_domains_count()}.\n")
97+
98+
99+
def get_domains_count():
100+
return len(execute_query_against_data_db("select distinct domain from stats"))
101+
102+
103+
def get_pie_chart_code(title, dataset_tuples):
104+
# code = f"pie title {title}\n"
105+
code = f"pie\n"
106+
for dataset_tuple in dataset_tuples:
107+
# Note: Mermaid use integer value when rendering
108+
code += f"\t\"{dataset_tuple[0]}\" : {round(dataset_tuple[1], 2)}\n"
109+
return code
110+
111+
112+
def csp_contain_unsafe_expression(csp_policy):
113+
contain_unsafe_expression = False
114+
# Determine if a CSP policy contains (default-src|script-src|script-src-elem|script-src-attr|style-src) directives using (unsafe-inline|unsafe-hashes|unsafe-eval) expressions
115+
# Based on "https://report-uri.com/home/generate" generator allowed instructions for CSP directives
116+
exp_all_unsafe_expressions = r'(unsafe-inline|unsafe-hashes|unsafe-eval)'
117+
exp_style_unsafe_expressions = r'(unsafe-inline|unsafe-hashes)'
118+
exp_directive_name_allowing_all_unsafe_expressions = r'(default-src|script-src|script-src-elem|script-src-attr)'
119+
directives = csp_policy.split(";")
120+
for directive in directives:
121+
if len(re.findall(exp_directive_name_allowing_all_unsafe_expressions, directive)) > 0 and len(re.findall(exp_all_unsafe_expressions, directive)) > 0:
122+
contain_unsafe_expression = True
123+
break
124+
elif directive.strip().startswith("style-src") and len(re.findall(exp_style_unsafe_expressions, directive)) > 0:
125+
contain_unsafe_expression = True
126+
break
127+
return contain_unsafe_expression
128+
129+
130+
# Functions in charge of generate stats sections
131+
132+
133+
def compute_header_global_usage(header_name):
134+
title = f"Global usage of header '{header_name}'"
135+
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed."
136+
# Prevent the case in which a domain specify X times the same headers...
137+
query = f"select distinct domain from stats where lower(http_header_name) = '{header_name}'"
138+
count_of_domains_using_the_header = len(
139+
execute_query_against_data_db(query))
140+
domains_count = get_domains_count()
141+
percentage_of_domains_using_the_header = (
142+
count_of_domains_using_the_header * 100) / domains_count
143+
dataset_tuples = [("Using it", percentage_of_domains_using_the_header),
144+
("Not using it", (100-percentage_of_domains_using_the_header))]
145+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
146+
add_stats_section(title, description, pie_chart_code)
147+
148+
149+
def compute_insecure_framing_configuration_global_usage():
150+
header_name = "x-frame-options"
151+
title = f"Global usage of insecure framing configuration via the header '{header_name}'"
152+
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure framing configuration: value different from `DENY` or `SAMEORIGIN` including unsupported values."
153+
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not in ('deny','sameorigin')"
154+
count_of_domains = execute_query_against_data_db(query)[0][0]
155+
domains_count = get_domains_count()
156+
percentage_of_domains = (count_of_domains * 100) / domains_count
157+
dataset_tuples = [("Insecure conf", percentage_of_domains),
158+
("Secure conf", (100-percentage_of_domains))]
159+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
160+
add_stats_section(title, description, pie_chart_code)
161+
162+
163+
def compute_hsts_preload_global_usage():
164+
header_name = "strict-transport-security"
165+
title = "Global usage of the Strict Transport Security 'preload' feature"
166+
description = f"Provide the distribution of usage of the '[preload](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security#preloading_strict_transport_security)' feature for the header '{header_name}' across all domains analyzed."
167+
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not like '%preload%'"
168+
count_of_domains = execute_query_against_data_db(query)[0][0]
169+
domains_count = get_domains_count()
170+
percentage_of_domains = (count_of_domains * 100) / domains_count
171+
dataset_tuples = [("Using it", percentage_of_domains),
172+
("Not using it", (100-percentage_of_domains))]
173+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
174+
add_stats_section(title, description, pie_chart_code)
175+
176+
177+
def compute_secure_headers_global_usage():
178+
title = "Global usage of secure headers"
179+
description = f"Provide the distribution of usage of secure headers across all domains analyzed."
180+
query = "select count(domain) from stats where http_header_name is NULL"
181+
count_of_domains = execute_query_against_data_db(query)[0][0]
182+
domains_count = get_domains_count()
183+
percentage_of_domains = (count_of_domains * 100) / domains_count
184+
dataset_tuples = [("Not using them", percentage_of_domains),
185+
("Using them", (100-percentage_of_domains))]
186+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
187+
add_stats_section(title, description, pie_chart_code)
188+
189+
190+
def compute_insecure_referrer_configuration_global_usage():
191+
header_name = "referrer-policy"
192+
title = f"Global usage of insecure referrer configuration via the header '{header_name}'"
193+
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure referrer configuration: value set to `unsafe-url` or `no-referrer-when-downgrade`.\n\n`no-referrer-when-downgrade` was included because it send origin, path, and querystring when the protocol security level stays the same (HTTPS is very often in place)."
194+
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) in ('unsafe-url','no-referrer-when-downgrade')"
195+
count_of_domains = execute_query_against_data_db(query)[0][0]
196+
domains_count = get_domains_count()
197+
percentage_of_domains = (count_of_domains * 100) / domains_count
198+
dataset_tuples = [("Insecure conf", percentage_of_domains),
199+
("Secure conf", (100-percentage_of_domains))]
200+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
201+
add_stats_section(title, description, pie_chart_code)
202+
203+
204+
def compute_hsts_average_maxage_global_usage():
205+
title = "Global common 'max-age' values of the Strict Transport Security header"
206+
query = "select lower(http_header_value) from stats where lower(http_header_name) = 'strict-transport-security' and lower(http_header_value) like '%max-age=%'"
207+
header_values = execute_query_against_data_db(query)
208+
expr = r'max-age\s*=\s*(\-?"?\d+"?)'
209+
# Gather values for max-age attribute
210+
values = []
211+
for header_value in header_values:
212+
v = header_value[0].strip('\n\r\t').replace('"', '')
213+
matches = re.findall(expr, v)
214+
if len(matches) > 0:
215+
values.append(int(matches[0]))
216+
# Find the most popular one
217+
occurences = Counter(values)
218+
maxage_most_popular_value = 0
219+
current_max_occurence_count = 0
220+
for maxage_value, occurence_count in occurences.items():
221+
if occurence_count > current_max_occurence_count:
222+
current_max_occurence_count = occurence_count
223+
maxage_most_popular_value = maxage_value
224+
description = f"* Most common value used is {maxage_most_popular_value} seconds ({round(maxage_most_popular_value/60)} minutes) across all domains analyzed."
225+
description += f"\n* Maximum value used is {max(values)} seconds ({round(max(values)/60)} minutes) across all domains analyzed."
226+
description += f"\n* Minimum value used is {min(values)} seconds ({round(min(values)/60)} minutes) across all domains analyzed."
227+
add_stats_section(title, description, None)
228+
229+
230+
def compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage():
231+
header_name = "content-security-policy"
232+
title = f"Global usage of content security policy with directives allowing unsafe expressions"
233+
description = f"Provide the distribution of content security policy allowing unsafe expressions across all domains analyzed.\n\nDetermine if a CSP policy contains `(default-src|script-src|script-src-elem|script-src-attr|style-src)` directives using `(unsafe-inline|unsafe-hashes|unsafe-eval)` expressions.\n\nBased on [Report-URI CSP](https://report-uri.com/home/generate) generator allowed instructions for CSP directives."
234+
query = f"select lower(http_header_value) from stats where lower(http_header_name) like '{header_name}%' and lower(http_header_value) like '%unsafe%'"
235+
header_values = execute_query_against_data_db(query)
236+
count_of_domains = 0
237+
for header_value in header_values:
238+
if csp_contain_unsafe_expression(header_value[0]):
239+
count_of_domains += 1
240+
domains_count = get_domains_count()
241+
percentage_of_domains = (count_of_domains * 100) / domains_count
242+
dataset_tuples = [("Using unsafe", percentage_of_domains),
243+
("Not using unsafe", (100-percentage_of_domains))]
244+
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
245+
add_stats_section(title, description, pie_chart_code)
246+
247+
248+
if __name__ == "__main__":
249+
for path in Path(IMAGE_FOLDER_LOCATION).glob("*.png"):
250+
path.unlink()
251+
for path in Path(IMAGE_FOLDER_LOCATION).glob("*.mmd"):
252+
path.unlink()
253+
oshp_headers = load_oshp_headers()
254+
init_stats_file()
255+
compute_secure_headers_global_usage()
256+
for header_name in oshp_headers:
257+
compute_header_global_usage(header_name)
258+
compute_insecure_framing_configuration_global_usage()
259+
compute_insecure_referrer_configuration_global_usage()
260+
compute_hsts_preload_global_usage()
261+
compute_hsts_average_maxage_global_usage()
262+
compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage()

ci/tab_stats_generate_png_files.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
#########################################################################
3+
# Generate the PNG image files from corresponding MMD (mermaid) files.
4+
#
5+
# Dependencies:
6+
# https://github.com/mermaid-js/mermaid-cli
7+
#########################################################################
8+
# Constants
9+
IMAGE_FOLDER_LOCATION="../assets/tab_stats_generated_images"
10+
# Generate images
11+
cd $IMAGE_FOLDER_LOCATION
12+
for mmd_file in *.mmd
13+
do
14+
png_file="${mmd_file%%.*}.png"
15+
npx -p @mermaid-js/mermaid-cli mmdc --quiet --input $mmd_file --output $png_file --outputFormat png --theme default --backgroundColor transparent
16+
done
17+
# Only let PNG files
18+
rm *.mmd
19+
cd -

ci/tab_stats_manage_generation.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/bash
2+
#########################################################################
3+
# This script manage the generation/update of the tab represented by the
4+
# file "tab_stats.md".
5+
#########################################################################
6+
DATA_DB_FILE_LOCATION="https://github.com/oshp/oshp-stats/raw/refs/heads/main/data/data.db"
7+
DATA_DB_FILE="/tmp/data.db"
8+
echo "[+] Download the database of headers analysis..."
9+
wget -q -O $DATA_DB_FILE $DATA_DB_FILE_LOCATION
10+
file $DATA_DB_FILE
11+
chmod +x tab_stats_generate_*
12+
echo "[+] Generate the MD file of the TAB and all the MMD files for every pie chart image..."
13+
python tab_stats_generate_md_file.py
14+
echo "[+] Generate the PNG image corresponding to each MMD file..."
15+
bash tab_stats_generate_png_files.sh
16+
echo "[+] Cleanup"
17+
rm $DATA_DB_FILE

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
requests

0 commit comments

Comments
 (0)