Skip to content

Add stats tab. #217

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/tab-stats-headers-generate-related-files.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: update_tab_stats_related_files
on:
workflow_dispatch:
push:
schedule:
- cron: '0 0 3 * *'
jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
sudo apt install wget
- name: Run update of the tab related files
run: |
cd ci; bash tab_stats_manage_generation.sh
- name: Set up Git user
run: git config --global user.email "[email protected]"; git config --global user.name "GHActionBot"
- name: Commit update
run: git add --all; git commit -am "Sync tab stats related files"; git push
1 change: 1 addition & 0 deletions assets/tab_stats_generated_images/.keep
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NEVER DELETE ME!!!!
264 changes: 264 additions & 0 deletions ci/tab_stats_generate_md_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
#!/usr/bin/python3
"""
Script using the gathered data from the OSHP project "oshp-stats" to generate/update the
markdown file "tab_stats.md" with mermaid pie charts with differents statistics about HTTP security headers usage.

Source:
https://mermaid-js.github.io/mermaid/#/pie
https://github.com/oshp/oshp-stats/
"""
import sqlite3
import re
import requests
import hashlib
from collections import Counter
from datetime import datetime
from pathlib import Path

# Constants
HTTP_REQUEST_TIMEOUT = 60
DATA_DB_FILE = "/tmp/data.db"
OSHP_SECURITY_HEADERS_FILE_lOCATION = "https://owasp.org/www-project-secure-headers/ci/headers_add.json"
MD_FILE = "../tab_stats.md"
IMAGE_FOLDER_LOCATION = "../assets/tab_stats_generated_images"
TAB_MD_TEMPLATE = """---
title: statistics
displaytext: Statistics
layout: null
tab: true
order: 10
tags: headers
---

<!-- All the content of this file is generated by the script "ci/tab_stats_generate_md_file.py" -->

<!-- DO NOT EDIT IT MANUALLY -->

# Statistic about HTTP security response headers usage

<!-- markdown-link-check-disable -->

"""
SECTION_TEMPLATE = f"""
## %s

%s

![%s]({IMAGE_FOLDER_LOCATION.replace('../', '')}/%s)
"""
SECTION_TEMPLATE_NO_MERMAID_CODE = """
## %s

%s
"""

# Utility functions


def prepare_generation_of_image_from_mermaid(mermaid_code, filename):
with open(f"{IMAGE_FOLDER_LOCATION}/{filename}.mmd", "w", encoding="utf-8") as f:
f.write(mermaid_code + "\n")


def load_oshp_headers():
header_names = []
resp = requests.get(OSHP_SECURITY_HEADERS_FILE_lOCATION, timeout=HTTP_REQUEST_TIMEOUT)
if resp.status_code != 200:
raise Exception(f"Status code {resp.status_code} received!")
for http_header in resp.json()["headers"]:
header_names.append(http_header["name"])
header_names.sort()
return header_names


def execute_query_against_data_db(sql_query):
with sqlite3.connect(DATA_DB_FILE) as connection:
curs = connection.cursor()
curs.execute(sql_query)
records = curs.fetchall()
return records


def add_stats_section(title, description, chart_mermaid_code):
with open(MD_FILE, mode="a", encoding="utf-8") as f:
if chart_mermaid_code is not None and len(chart_mermaid_code.strip()) > 0:
base_image_filename = hashlib.sha1(title.encode("utf8")).hexdigest()
prepare_generation_of_image_from_mermaid(chart_mermaid_code, base_image_filename)
md_code = SECTION_TEMPLATE % (title, description, base_image_filename, f"{base_image_filename}.png")
else:
md_code = SECTION_TEMPLATE_NO_MERMAID_CODE % (title, description)
f.write(f"{md_code}\n")


def init_stats_file():
with open(MD_FILE, mode="w", encoding="utf-8") as f:
cdate = datetime.now().strftime("%m/%d/%Y at %H:%M:%S")
f.write(TAB_MD_TEMPLATE)
f.write("\n\n")
f.write(f"⏲️ Last update: {cdate} - Domains analyzed count: {get_domains_count()}.\n")


def get_domains_count():
return len(execute_query_against_data_db("select distinct domain from stats"))


def get_pie_chart_code(title, dataset_tuples):
# code = f"pie title {title}\n"
code = f"pie\n"
for dataset_tuple in dataset_tuples:
# Note: Mermaid use integer value when rendering
code += f"\t\"{dataset_tuple[0]}\" : {round(dataset_tuple[1], 2)}\n"
return code


def csp_contain_unsafe_expression(csp_policy):
contain_unsafe_expression = False
# Determine if a CSP policy contains (default-src|script-src|script-src-elem|script-src-attr|style-src) directives using (unsafe-inline|unsafe-hashes|unsafe-eval) expressions
# Based on "https://report-uri.com/home/generate" generator allowed instructions for CSP directives
exp_all_unsafe_expressions = r'(unsafe-inline|unsafe-hashes|unsafe-eval)'
exp_style_unsafe_expressions = r'(unsafe-inline|unsafe-hashes)'
exp_directive_name_allowing_all_unsafe_expressions = r'(default-src|script-src|script-src-elem|script-src-attr)'
directives = csp_policy.split(";")
for directive in directives:
if len(re.findall(exp_directive_name_allowing_all_unsafe_expressions, directive)) > 0 and len(re.findall(exp_all_unsafe_expressions, directive)) > 0:
contain_unsafe_expression = True
break
elif directive.strip().startswith("style-src") and len(re.findall(exp_style_unsafe_expressions, directive)) > 0:
contain_unsafe_expression = True
break
return contain_unsafe_expression


# Functions in charge of generate stats sections


def compute_header_global_usage(header_name):
title = f"Global usage of header '{header_name}'"
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed."
# Prevent the case in which a domain specify X times the same headers...
query = f"select distinct domain from stats where lower(http_header_name) = '{header_name}'"
count_of_domains_using_the_header = len(
execute_query_against_data_db(query))
domains_count = get_domains_count()
percentage_of_domains_using_the_header = (
count_of_domains_using_the_header * 100) / domains_count
dataset_tuples = [("Using it", percentage_of_domains_using_the_header),
("Not using it", (100-percentage_of_domains_using_the_header))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)


def compute_insecure_framing_configuration_global_usage():
header_name = "x-frame-options"
title = f"Global usage of insecure framing configuration via the header '{header_name}'"
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure framing configuration: value different from `DENY` or `SAMEORIGIN` including unsupported values."
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not in ('deny','sameorigin')"
count_of_domains = execute_query_against_data_db(query)[0][0]
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Insecure conf", percentage_of_domains),
("Secure conf", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)


def compute_hsts_preload_global_usage():
header_name = "strict-transport-security"
title = "Global usage of the Strict Transport Security 'preload' feature"
description = f"Provide the distribution of usage of the '[preload](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security#preloading_strict_transport_security)' feature for the header '{header_name}' across all domains analyzed."
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not like '%preload%'"
count_of_domains = execute_query_against_data_db(query)[0][0]
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Using it", percentage_of_domains),
("Not using it", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)


def compute_secure_headers_global_usage():
title = "Global usage of secure headers"
description = f"Provide the distribution of usage of secure headers across all domains analyzed."
query = "select count(domain) from stats where http_header_name is NULL"
count_of_domains = execute_query_against_data_db(query)[0][0]
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Not using them", percentage_of_domains),
("Using them", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)


def compute_insecure_referrer_configuration_global_usage():
header_name = "referrer-policy"
title = f"Global usage of insecure referrer configuration via the header '{header_name}'"
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure referrer configuration: value set to `unsafe-url` or `no-referrer-when-downgrade`.\n\n`no-referrer-when-downgrade` was included because it send origin, path, and querystring when the protocol security level stays the same (HTTPS is very often in place)."
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) in ('unsafe-url','no-referrer-when-downgrade')"
count_of_domains = execute_query_against_data_db(query)[0][0]
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Insecure conf", percentage_of_domains),
("Secure conf", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)


def compute_hsts_average_maxage_global_usage():
title = "Global common 'max-age' values of the Strict Transport Security header"
query = "select lower(http_header_value) from stats where lower(http_header_name) = 'strict-transport-security' and lower(http_header_value) like '%max-age=%'"
header_values = execute_query_against_data_db(query)
expr = r'max-age\s*=\s*(\-?"?\d+"?)'
# Gather values for max-age attribute
values = []
for header_value in header_values:
v = header_value[0].strip('\n\r\t').replace('"', '')
matches = re.findall(expr, v)
if len(matches) > 0:
values.append(int(matches[0]))
# Find the most popular one
occurences = Counter(values)
maxage_most_popular_value = 0
current_max_occurence_count = 0
for maxage_value, occurence_count in occurences.items():
if occurence_count > current_max_occurence_count:
current_max_occurence_count = occurence_count
maxage_most_popular_value = maxage_value
description = f"* Most common value used is {maxage_most_popular_value} seconds ({round(maxage_most_popular_value/60)} minutes) across all domains analyzed."
description += f"\n* Maximum value used is {max(values)} seconds ({round(max(values)/60)} minutes) across all domains analyzed."
description += f"\n* Minimum value used is {min(values)} seconds ({round(min(values)/60)} minutes) across all domains analyzed."
add_stats_section(title, description, None)


def compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage():
header_name = "content-security-policy"
title = f"Global usage of content security policy with directives allowing unsafe expressions"
description = f"Provide the distribution of content security policy allowing unsafe expressions across all domains analyzed.\n\nDetermine if a CSP policy contains `(default-src|script-src|script-src-elem|script-src-attr|style-src)` directives using `(unsafe-inline|unsafe-hashes|unsafe-eval)` expressions.\n\nBased on [Report-URI CSP](https://report-uri.com/home/generate) generator allowed instructions for CSP directives."
query = f"select lower(http_header_value) from stats where lower(http_header_name) like '{header_name}%' and lower(http_header_value) like '%unsafe%'"
header_values = execute_query_against_data_db(query)
count_of_domains = 0
for header_value in header_values:
if csp_contain_unsafe_expression(header_value[0]):
count_of_domains += 1
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Using unsafe", percentage_of_domains),
("Not using unsafe", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)


if __name__ == "__main__":
for path in Path(IMAGE_FOLDER_LOCATION).glob("*.png"):
path.unlink()
for path in Path(IMAGE_FOLDER_LOCATION).glob("*.mmd"):
path.unlink()
oshp_headers = load_oshp_headers()
init_stats_file()
compute_secure_headers_global_usage()
for header_name in oshp_headers:
compute_header_global_usage(header_name)
compute_insecure_framing_configuration_global_usage()
compute_insecure_referrer_configuration_global_usage()
compute_hsts_preload_global_usage()
compute_hsts_average_maxage_global_usage()
compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage()
19 changes: 19 additions & 0 deletions ci/tab_stats_generate_png_files.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash
#########################################################################
# Generate the PNG image files from corresponding MMD (mermaid) files.
#
# Dependencies:
# https://github.com/mermaid-js/mermaid-cli
#########################################################################
# Constants
IMAGE_FOLDER_LOCATION="../assets/tab_stats_generated_images"
# Generate images
cd $IMAGE_FOLDER_LOCATION
for mmd_file in *.mmd
do
png_file="${mmd_file%%.*}.png"
npx -p @mermaid-js/mermaid-cli mmdc --quiet --input $mmd_file --output $png_file --outputFormat png --theme default --backgroundColor transparent
done
# Only let PNG files
rm *.mmd
cd -
17 changes: 17 additions & 0 deletions ci/tab_stats_manage_generation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
#########################################################################
# This script manage the generation/update of the tab represented by the
# file "tab_stats.md".
#########################################################################
DATA_DB_FILE_LOCATION="https://github.com/oshp/oshp-stats/raw/refs/heads/main/data/data.db"
DATA_DB_FILE="/tmp/data.db"
echo "[+] Download the database of headers analysis..."
wget -q -O $DATA_DB_FILE $DATA_DB_FILE_LOCATION
file $DATA_DB_FILE
chmod +x tab_stats_generate_*
echo "[+] Generate the MD file of the TAB and all the MMD files for every pie chart image..."
python tab_stats_generate_md_file.py
echo "[+] Generate the PNG image corresponding to each MMD file..."
bash tab_stats_generate_png_files.sh
echo "[+] Cleanup"
rm $DATA_DB_FILE
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requests
Loading