Merge pull request #217 from righettod/master

riramar · web-flow · commit 68f6c9d94b23 · 2025-02-01T20:36:31.000-03:00
Add stats tab.
diff --git a/.github/workflows/tab-stats-headers-generate-related-files.yml b/.github/workflows/tab-stats-headers-generate-related-files.yml
@@ -0,0 +1,29 @@
+name: update_tab_stats_related_files
+on:
+  workflow_dispatch:
+  push:
+  schedule:
+    - cron: '0 0 3 * *'
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write    
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.10"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        sudo apt install wget
+    - name: Run update of the tab related files
+      run: |
+        cd ci; bash tab_stats_manage_generation.sh
+    - name: Set up Git user
+      run: git config --global user.email "gha@github.com"; git config --global user.name "GHActionBot"
+    - name: Commit update
+      run: git add --all; git commit -am "Sync tab stats related files"; git push
diff --git a/assets/tab_stats_generated_images/.keep b/assets/tab_stats_generated_images/.keep
@@ -0,0 +1 @@
+NEVER DELETE ME!!!!
diff --git a/ci/tab_stats_generate_md_file.py b/ci/tab_stats_generate_md_file.py
@@ -0,0 +1,264 @@
+#!/usr/bin/python3
+"""
+Script using the gathered data from the OSHP project "oshp-stats" to generate/update the 
+markdown file "tab_stats.md" with mermaid pie charts with differents statistics about HTTP security headers usage.
+
+Source:
+    https://mermaid-js.github.io/mermaid/#/pie
+    https://github.com/oshp/oshp-stats/
+"""
+import sqlite3
+import re
+import requests
+import hashlib
+from collections import Counter
+from datetime import datetime
+from pathlib import Path
+
+# Constants
+HTTP_REQUEST_TIMEOUT = 60
+DATA_DB_FILE = "/tmp/data.db"
+OSHP_SECURITY_HEADERS_FILE_lOCATION = "https://owasp.org/www-project-secure-headers/ci/headers_add.json"
+MD_FILE = "../tab_stats.md"
+IMAGE_FOLDER_LOCATION = "../assets/tab_stats_generated_images"
+TAB_MD_TEMPLATE = """---
+title: statistics
+displaytext: Statistics
+layout: null
+tab: true
+order: 10
+tags: headers
+---
+
+<!-- All the content of this file is generated by the script "ci/tab_stats_generate_md_file.py" -->
+
+<!-- DO NOT EDIT IT MANUALLY -->
+
+# Statistic about HTTP security response headers usage
+
+<!-- markdown-link-check-disable -->
+
+"""
+SECTION_TEMPLATE = f"""
+## %s
+
+%s
+
+![%s]({IMAGE_FOLDER_LOCATION.replace('../', '')}/%s)
+"""
+SECTION_TEMPLATE_NO_MERMAID_CODE = """
+## %s
+
+%s
+"""
+
+# Utility functions
+
+
+def prepare_generation_of_image_from_mermaid(mermaid_code, filename):
+    with open(f"{IMAGE_FOLDER_LOCATION}/{filename}.mmd", "w", encoding="utf-8") as f:
+        f.write(mermaid_code + "\n")
+
+
+def load_oshp_headers():
+    header_names = []
+    resp = requests.get(OSHP_SECURITY_HEADERS_FILE_lOCATION, timeout=HTTP_REQUEST_TIMEOUT)
+    if resp.status_code != 200:
+        raise Exception(f"Status code {resp.status_code} received!")
+    for http_header in resp.json()["headers"]:
+        header_names.append(http_header["name"])
+    header_names.sort()
+    return header_names
+
+
+def execute_query_against_data_db(sql_query):
+    with sqlite3.connect(DATA_DB_FILE) as connection:
+        curs = connection.cursor()
+        curs.execute(sql_query)
+        records = curs.fetchall()
+        return records
+
+
+def add_stats_section(title, description, chart_mermaid_code):
+    with open(MD_FILE, mode="a", encoding="utf-8") as f:
+        if chart_mermaid_code is not None and len(chart_mermaid_code.strip()) > 0:
+            base_image_filename = hashlib.sha1(title.encode("utf8")).hexdigest()
+            prepare_generation_of_image_from_mermaid(chart_mermaid_code, base_image_filename)
+            md_code = SECTION_TEMPLATE % (title, description, base_image_filename, f"{base_image_filename}.png")
+        else:
+            md_code = SECTION_TEMPLATE_NO_MERMAID_CODE % (title, description)
+        f.write(f"{md_code}\n")
+
+
+def init_stats_file():
+    with open(MD_FILE, mode="w", encoding="utf-8") as f:
+        cdate = datetime.now().strftime("%m/%d/%Y at %H:%M:%S")
+        f.write(TAB_MD_TEMPLATE)
+        f.write("\n\n")
+        f.write(f"⏲️ Last update: {cdate} - Domains analyzed count: {get_domains_count()}.\n")
+
+
+def get_domains_count():
+    return len(execute_query_against_data_db("select distinct domain from stats"))
+
+
+def get_pie_chart_code(title, dataset_tuples):
+    # code = f"pie title {title}\n"
+    code = f"pie\n"
+    for dataset_tuple in dataset_tuples:
+        # Note: Mermaid use integer value when rendering
+        code += f"\t\"{dataset_tuple[0]}\" : {round(dataset_tuple[1], 2)}\n"
+    return code
+
+
+def csp_contain_unsafe_expression(csp_policy):
+    contain_unsafe_expression = False
+    # Determine if a CSP policy contains (default-src|script-src|script-src-elem|script-src-attr|style-src) directives using (unsafe-inline|unsafe-hashes|unsafe-eval) expressions
+    # Based on "https://report-uri.com/home/generate" generator allowed instructions for CSP directives
+    exp_all_unsafe_expressions = r'(unsafe-inline|unsafe-hashes|unsafe-eval)'
+    exp_style_unsafe_expressions = r'(unsafe-inline|unsafe-hashes)'
+    exp_directive_name_allowing_all_unsafe_expressions = r'(default-src|script-src|script-src-elem|script-src-attr)'
+    directives = csp_policy.split(";")
+    for directive in directives:
+        if len(re.findall(exp_directive_name_allowing_all_unsafe_expressions, directive)) > 0 and len(re.findall(exp_all_unsafe_expressions, directive)) > 0:
+            contain_unsafe_expression = True
+            break
+        elif directive.strip().startswith("style-src") and len(re.findall(exp_style_unsafe_expressions, directive)) > 0:
+            contain_unsafe_expression = True
+            break
+    return contain_unsafe_expression
+
+
+# Functions in charge of generate stats sections
+
+
+def compute_header_global_usage(header_name):
+    title = f"Global usage of header '{header_name}'"
+    description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed."
+    # Prevent the case in which a domain specify X times the same headers...
+    query = f"select distinct domain from stats where lower(http_header_name) = '{header_name}'"
+    count_of_domains_using_the_header = len(
+        execute_query_against_data_db(query))
+    domains_count = get_domains_count()
+    percentage_of_domains_using_the_header = (
+        count_of_domains_using_the_header * 100) / domains_count
+    dataset_tuples = [("Using it", percentage_of_domains_using_the_header),
+                      ("Not using it", (100-percentage_of_domains_using_the_header))]
+    pie_chart_code = get_pie_chart_code(title, dataset_tuples)
+    add_stats_section(title, description, pie_chart_code)
+
+
+def compute_insecure_framing_configuration_global_usage():
+    header_name = "x-frame-options"
+    title = f"Global usage of insecure framing configuration via the header '{header_name}'"
+    description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure framing configuration: value different from `DENY` or `SAMEORIGIN` including unsupported values."
+    query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not in ('deny','sameorigin')"
+    count_of_domains = execute_query_against_data_db(query)[0][0]
+    domains_count = get_domains_count()
+    percentage_of_domains = (count_of_domains * 100) / domains_count
+    dataset_tuples = [("Insecure conf", percentage_of_domains),
+                      ("Secure conf", (100-percentage_of_domains))]
+    pie_chart_code = get_pie_chart_code(title, dataset_tuples)
+    add_stats_section(title, description, pie_chart_code)
+
+
+def compute_hsts_preload_global_usage():
+    header_name = "strict-transport-security"
+    title = "Global usage of the Strict Transport Security 'preload' feature"
+    description = f"Provide the distribution of usage of the '[preload](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security#preloading_strict_transport_security)' feature for the header '{header_name}' across all domains analyzed."
+    query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not like '%preload%'"
+    count_of_domains = execute_query_against_data_db(query)[0][0]
+    domains_count = get_domains_count()
+    percentage_of_domains = (count_of_domains * 100) / domains_count
+    dataset_tuples = [("Using it", percentage_of_domains),
+                      ("Not using it", (100-percentage_of_domains))]
+    pie_chart_code = get_pie_chart_code(title, dataset_tuples)
+    add_stats_section(title, description, pie_chart_code)
+
+
+def compute_secure_headers_global_usage():
+    title = "Global usage of secure headers"
+    description = f"Provide the distribution of usage of secure headers across all domains analyzed."
+    query = "select count(domain) from stats where http_header_name is NULL"
+    count_of_domains = execute_query_against_data_db(query)[0][0]
+    domains_count = get_domains_count()
+    percentage_of_domains = (count_of_domains * 100) / domains_count
+    dataset_tuples = [("Not using them", percentage_of_domains),
+                      ("Using them", (100-percentage_of_domains))]
+    pie_chart_code = get_pie_chart_code(title, dataset_tuples)
+    add_stats_section(title, description, pie_chart_code)
+
+
+def compute_insecure_referrer_configuration_global_usage():
+    header_name = "referrer-policy"
+    title = f"Global usage of insecure referrer configuration via the header '{header_name}'"
+    description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure referrer configuration: value set to `unsafe-url` or `no-referrer-when-downgrade`.\n\n`no-referrer-when-downgrade` was included because it send origin, path, and querystring when the protocol security level stays the same (HTTPS is very often in place)."
+    query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) in ('unsafe-url','no-referrer-when-downgrade')"
+    count_of_domains = execute_query_against_data_db(query)[0][0]
+    domains_count = get_domains_count()
+    percentage_of_domains = (count_of_domains * 100) / domains_count
+    dataset_tuples = [("Insecure conf", percentage_of_domains),
+                      ("Secure conf", (100-percentage_of_domains))]
+    pie_chart_code = get_pie_chart_code(title, dataset_tuples)
+    add_stats_section(title, description, pie_chart_code)
+
+
+def compute_hsts_average_maxage_global_usage():
+    title = "Global common 'max-age' values of the Strict Transport Security header"
+    query = "select lower(http_header_value) from stats where lower(http_header_name) = 'strict-transport-security' and lower(http_header_value) like '%max-age=%'"
+    header_values = execute_query_against_data_db(query)
+    expr = r'max-age\s*=\s*(\-?"?\d+"?)'
+    # Gather values for max-age attribute
+    values = []
+    for header_value in header_values:
+        v = header_value[0].strip('\n\r\t').replace('"', '')
+        matches = re.findall(expr, v)
+        if len(matches) > 0:
+            values.append(int(matches[0]))
+    # Find the most popular one
+    occurences = Counter(values)
+    maxage_most_popular_value = 0
+    current_max_occurence_count = 0
+    for maxage_value, occurence_count in occurences.items():
+        if occurence_count > current_max_occurence_count:
+            current_max_occurence_count = occurence_count
+            maxage_most_popular_value = maxage_value
+    description = f"* Most common value used is {maxage_most_popular_value} seconds ({round(maxage_most_popular_value/60)} minutes) across all domains analyzed."
+    description += f"\n* Maximum value used is {max(values)} seconds ({round(max(values)/60)} minutes) across all domains analyzed."
+    description += f"\n* Minimum value used is {min(values)} seconds ({round(min(values)/60)} minutes) across all domains analyzed."
+    add_stats_section(title, description, None)
+
+
+def compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage():
+    header_name = "content-security-policy"
+    title = f"Global usage of content security policy with directives allowing unsafe expressions"
+    description = f"Provide the distribution of content security policy allowing unsafe expressions across all domains analyzed.\n\nDetermine if a CSP policy contains `(default-src|script-src|script-src-elem|script-src-attr|style-src)` directives using `(unsafe-inline|unsafe-hashes|unsafe-eval)` expressions.\n\nBased on [Report-URI CSP](https://report-uri.com/home/generate) generator allowed instructions for CSP directives."
+    query = f"select lower(http_header_value) from stats where lower(http_header_name) like '{header_name}%' and lower(http_header_value) like '%unsafe%'"
+    header_values = execute_query_against_data_db(query)
+    count_of_domains = 0
+    for header_value in header_values:
+        if csp_contain_unsafe_expression(header_value[0]):
+            count_of_domains += 1
+    domains_count = get_domains_count()
+    percentage_of_domains = (count_of_domains * 100) / domains_count
+    dataset_tuples = [("Using unsafe", percentage_of_domains),
+                      ("Not using unsafe", (100-percentage_of_domains))]
+    pie_chart_code = get_pie_chart_code(title, dataset_tuples)
+    add_stats_section(title, description, pie_chart_code)
+
+
+if __name__ == "__main__":
+    for path in Path(IMAGE_FOLDER_LOCATION).glob("*.png"):
+        path.unlink()
+    for path in Path(IMAGE_FOLDER_LOCATION).glob("*.mmd"):
+        path.unlink()
+    oshp_headers = load_oshp_headers()
+    init_stats_file()
+    compute_secure_headers_global_usage()
+    for header_name in oshp_headers:
+        compute_header_global_usage(header_name)
+    compute_insecure_framing_configuration_global_usage()
+    compute_insecure_referrer_configuration_global_usage()
+    compute_hsts_preload_global_usage()
+    compute_hsts_average_maxage_global_usage()
+    compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage()
diff --git a/ci/tab_stats_generate_png_files.sh b/ci/tab_stats_generate_png_files.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+#########################################################################
+# Generate the PNG image files from corresponding MMD (mermaid) files.
+#
+# Dependencies:
+#   https://github.com/mermaid-js/mermaid-cli
+#########################################################################
+# Constants
+IMAGE_FOLDER_LOCATION="../assets/tab_stats_generated_images"
+# Generate images
+cd $IMAGE_FOLDER_LOCATION
+for mmd_file in *.mmd
+do
+    png_file="${mmd_file%%.*}.png"
+    npx -p @mermaid-js/mermaid-cli mmdc --quiet --input $mmd_file --output $png_file --outputFormat png --theme default --backgroundColor transparent
+done
+# Only let PNG files
+rm *.mmd
+cd -
diff --git a/ci/tab_stats_manage_generation.sh b/ci/tab_stats_manage_generation.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+#########################################################################
+# This script manage the generation/update of the tab represented by the 
+# file "tab_stats.md".
+#########################################################################
+DATA_DB_FILE_LOCATION="https://github.com/oshp/oshp-stats/raw/refs/heads/main/data/data.db"
+DATA_DB_FILE="/tmp/data.db"
+echo "[+] Download the database of headers analysis..."
+wget -q -O $DATA_DB_FILE $DATA_DB_FILE_LOCATION
+file $DATA_DB_FILE
+chmod +x tab_stats_generate_*
+echo "[+] Generate the MD file of the TAB and all the MMD files for every pie chart image..."
+python tab_stats_generate_md_file.py
+echo "[+] Generate the PNG image corresponding to each MMD file..."
+bash tab_stats_generate_png_files.sh
+echo "[+] Cleanup"
+rm $DATA_DB_FILE
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+requests
diff --git a/tab_stats.md b/tab_stats.md