|
| 1 | +#!/usr/bin/python3 |
| 2 | +""" |
| 3 | +Script using the gathered data from the OSHP project "oshp-stats" to generate/update the |
| 4 | +markdown file "tab_stats.md" with mermaid pie charts with differents statistics about HTTP security headers usage. |
| 5 | +
|
| 6 | +Source: |
| 7 | + https://mermaid-js.github.io/mermaid/#/pie |
| 8 | + https://github.com/oshp/oshp-stats/ |
| 9 | +""" |
| 10 | +import sqlite3 |
| 11 | +import re |
| 12 | +import requests |
| 13 | +import hashlib |
| 14 | +from collections import Counter |
| 15 | +from datetime import datetime |
| 16 | +from pathlib import Path |
| 17 | + |
| 18 | +# Constants |
| 19 | +HTTP_REQUEST_TIMEOUT = 60 |
| 20 | +DATA_DB_FILE = "/tmp/data.db" |
| 21 | +OSHP_SECURITY_HEADERS_FILE_lOCATION = "https://owasp.org/www-project-secure-headers/ci/headers_add.json" |
| 22 | +MD_FILE = "../tab_stats.md" |
| 23 | +IMAGE_FOLDER_LOCATION = "../assets/tab_stats_generated_images" |
| 24 | +TAB_MD_TEMPLATE = """--- |
| 25 | +title: statistics |
| 26 | +displaytext: Statistics |
| 27 | +layout: null |
| 28 | +tab: true |
| 29 | +order: 10 |
| 30 | +tags: headers |
| 31 | +--- |
| 32 | +
|
| 33 | +<!-- All the content of this file is generated by the script "ci/tab_stats_generate_md_file.py" --> |
| 34 | +
|
| 35 | +<!-- DO NOT EDIT IT MANUALLY --> |
| 36 | +
|
| 37 | +# Statistic about HTTP security response headers usage |
| 38 | +
|
| 39 | +<!-- markdown-link-check-disable --> |
| 40 | +
|
| 41 | +""" |
| 42 | +SECTION_TEMPLATE = f""" |
| 43 | +## %s |
| 44 | +
|
| 45 | +%s |
| 46 | +
|
| 47 | +}/%s) |
| 48 | +""" |
| 49 | +SECTION_TEMPLATE_NO_MERMAID_CODE = """ |
| 50 | +## %s |
| 51 | +
|
| 52 | +%s |
| 53 | +""" |
| 54 | + |
| 55 | +# Utility functions |
| 56 | + |
| 57 | + |
| 58 | +def prepare_generation_of_image_from_mermaid(mermaid_code, filename): |
| 59 | + with open(f"{IMAGE_FOLDER_LOCATION}/{filename}.mmd", "w", encoding="utf-8") as f: |
| 60 | + f.write(mermaid_code + "\n") |
| 61 | + |
| 62 | + |
| 63 | +def load_oshp_headers(): |
| 64 | + header_names = [] |
| 65 | + resp = requests.get(OSHP_SECURITY_HEADERS_FILE_lOCATION, timeout=HTTP_REQUEST_TIMEOUT) |
| 66 | + if resp.status_code != 200: |
| 67 | + raise Exception(f"Status code {resp.status_code} received!") |
| 68 | + for http_header in resp.json()["headers"]: |
| 69 | + header_names.append(http_header["name"]) |
| 70 | + header_names.sort() |
| 71 | + return header_names |
| 72 | + |
| 73 | + |
| 74 | +def execute_query_against_data_db(sql_query): |
| 75 | + with sqlite3.connect(DATA_DB_FILE) as connection: |
| 76 | + curs = connection.cursor() |
| 77 | + curs.execute(sql_query) |
| 78 | + records = curs.fetchall() |
| 79 | + return records |
| 80 | + |
| 81 | + |
| 82 | +def add_stats_section(title, description, chart_mermaid_code): |
| 83 | + with open(MD_FILE, mode="a", encoding="utf-8") as f: |
| 84 | + if chart_mermaid_code is not None and len(chart_mermaid_code.strip()) > 0: |
| 85 | + base_image_filename = hashlib.sha1(title.encode("utf8")).hexdigest() |
| 86 | + prepare_generation_of_image_from_mermaid(chart_mermaid_code, base_image_filename) |
| 87 | + md_code = SECTION_TEMPLATE % (title, description, base_image_filename, f"{base_image_filename}.png") |
| 88 | + else: |
| 89 | + md_code = SECTION_TEMPLATE_NO_MERMAID_CODE % (title, description) |
| 90 | + f.write(f"{md_code}\n") |
| 91 | + |
| 92 | + |
| 93 | +def init_stats_file(): |
| 94 | + with open(MD_FILE, mode="w", encoding="utf-8") as f: |
| 95 | + cdate = datetime.now().strftime("%m/%d/%Y at %H:%M:%S") |
| 96 | + f.write(TAB_MD_TEMPLATE) |
| 97 | + f.write("\n\n") |
| 98 | + f.write(f"⏲️ Last update: {cdate} - Domains analyzed count: {get_domains_count()}.\n") |
| 99 | + |
| 100 | + |
| 101 | +def get_domains_count(): |
| 102 | + return len(execute_query_against_data_db("select distinct domain from stats")) |
| 103 | + |
| 104 | + |
| 105 | +def get_pie_chart_code(title, dataset_tuples): |
| 106 | + # code = f"pie title {title}\n" |
| 107 | + code = f"pie\n" |
| 108 | + for dataset_tuple in dataset_tuples: |
| 109 | + # Note: Mermaid use integer value when rendering |
| 110 | + code += f"\t\"{dataset_tuple[0]}\" : {round(dataset_tuple[1], 2)}\n" |
| 111 | + return code |
| 112 | + |
| 113 | + |
| 114 | +def csp_contain_unsafe_expression(csp_policy): |
| 115 | + contain_unsafe_expression = False |
| 116 | + # Determine if a CSP policy contains (default-src|script-src|script-src-elem|script-src-attr|style-src) directives using (unsafe-inline|unsafe-hashes|unsafe-eval) expressions |
| 117 | + # Based on "https://report-uri.com/home/generate" generator allowed instructions for CSP directives |
| 118 | + exp_all_unsafe_expressions = r'(unsafe-inline|unsafe-hashes|unsafe-eval)' |
| 119 | + exp_style_unsafe_expressions = r'(unsafe-inline|unsafe-hashes)' |
| 120 | + exp_directive_name_allowing_all_unsafe_expressions = r'(default-src|script-src|script-src-elem|script-src-attr)' |
| 121 | + directives = csp_policy.split(";") |
| 122 | + for directive in directives: |
| 123 | + if len(re.findall(exp_directive_name_allowing_all_unsafe_expressions, directive)) > 0 and len(re.findall(exp_all_unsafe_expressions, directive)) > 0: |
| 124 | + contain_unsafe_expression = True |
| 125 | + break |
| 126 | + elif directive.strip().startswith("style-src") and len(re.findall(exp_style_unsafe_expressions, directive)) > 0: |
| 127 | + contain_unsafe_expression = True |
| 128 | + break |
| 129 | + return contain_unsafe_expression |
| 130 | + |
| 131 | + |
| 132 | +# Functions in charge of generate stats sections |
| 133 | + |
| 134 | + |
| 135 | +def compute_header_global_usage(header_name): |
| 136 | + title = f"Global usage of header '{header_name}'" |
| 137 | + description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed." |
| 138 | + # Prevent the case in which a domain specify X times the same headers... |
| 139 | + query = f"select distinct domain from stats where lower(http_header_name) = '{header_name}'" |
| 140 | + count_of_domains_using_the_header = len( |
| 141 | + execute_query_against_data_db(query)) |
| 142 | + domains_count = get_domains_count() |
| 143 | + percentage_of_domains_using_the_header = ( |
| 144 | + count_of_domains_using_the_header * 100) / domains_count |
| 145 | + dataset_tuples = [("Using it", percentage_of_domains_using_the_header), |
| 146 | + ("Not using it", (100-percentage_of_domains_using_the_header))] |
| 147 | + pie_chart_code = get_pie_chart_code(title, dataset_tuples) |
| 148 | + add_stats_section(title, description, pie_chart_code) |
| 149 | + |
| 150 | + |
| 151 | +def compute_insecure_framing_configuration_global_usage(): |
| 152 | + header_name = "x-frame-options" |
| 153 | + title = f"Global usage of insecure framing configuration via the header '{header_name}'" |
| 154 | + description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure framing configuration: value different from `DENY` or `SAMEORIGIN` including unsupported values." |
| 155 | + query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not in ('deny','sameorigin')" |
| 156 | + count_of_domains = execute_query_against_data_db(query)[0][0] |
| 157 | + domains_count = get_domains_count() |
| 158 | + percentage_of_domains = (count_of_domains * 100) / domains_count |
| 159 | + dataset_tuples = [("Insecure conf", percentage_of_domains), |
| 160 | + ("Secure conf", (100-percentage_of_domains))] |
| 161 | + pie_chart_code = get_pie_chart_code(title, dataset_tuples) |
| 162 | + add_stats_section(title, description, pie_chart_code) |
| 163 | + |
| 164 | + |
| 165 | +def compute_hsts_preload_global_usage(): |
| 166 | + header_name = "strict-transport-security" |
| 167 | + title = "Global usage of the Strict Transport Security 'preload' feature" |
| 168 | + description = f"Provide the distribution of usage of the '[preload](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security#preloading_strict_transport_security)' feature for the header '{header_name}' across all domains analyzed." |
| 169 | + query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not like '%preload%'" |
| 170 | + count_of_domains = execute_query_against_data_db(query)[0][0] |
| 171 | + domains_count = get_domains_count() |
| 172 | + percentage_of_domains = (count_of_domains * 100) / domains_count |
| 173 | + dataset_tuples = [("Using it", percentage_of_domains), |
| 174 | + ("Not using it", (100-percentage_of_domains))] |
| 175 | + pie_chart_code = get_pie_chart_code(title, dataset_tuples) |
| 176 | + add_stats_section(title, description, pie_chart_code) |
| 177 | + |
| 178 | + |
| 179 | +def compute_secure_headers_global_usage(): |
| 180 | + title = "Global usage of secure headers" |
| 181 | + description = f"Provide the distribution of usage of secure headers across all domains analyzed." |
| 182 | + query = "select count(domain) from stats where http_header_name is NULL" |
| 183 | + count_of_domains = execute_query_against_data_db(query)[0][0] |
| 184 | + domains_count = get_domains_count() |
| 185 | + percentage_of_domains = (count_of_domains * 100) / domains_count |
| 186 | + dataset_tuples = [("Not using them", percentage_of_domains), |
| 187 | + ("Using them", (100-percentage_of_domains))] |
| 188 | + pie_chart_code = get_pie_chart_code(title, dataset_tuples) |
| 189 | + add_stats_section(title, description, pie_chart_code) |
| 190 | + |
| 191 | + |
| 192 | +def compute_insecure_referrer_configuration_global_usage(): |
| 193 | + header_name = "referrer-policy" |
| 194 | + title = f"Global usage of insecure referrer configuration via the header '{header_name}'" |
| 195 | + description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure referrer configuration: value set to `unsafe-url` or `no-referrer-when-downgrade`.\n\n`no-referrer-when-downgrade` was included because it send origin, path, and querystring when the protocol security level stays the same (HTTPS is very often in place)." |
| 196 | + query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) in ('unsafe-url','no-referrer-when-downgrade')" |
| 197 | + count_of_domains = execute_query_against_data_db(query)[0][0] |
| 198 | + domains_count = get_domains_count() |
| 199 | + percentage_of_domains = (count_of_domains * 100) / domains_count |
| 200 | + dataset_tuples = [("Insecure conf", percentage_of_domains), |
| 201 | + ("Secure conf", (100-percentage_of_domains))] |
| 202 | + pie_chart_code = get_pie_chart_code(title, dataset_tuples) |
| 203 | + add_stats_section(title, description, pie_chart_code) |
| 204 | + |
| 205 | + |
| 206 | +def compute_hsts_average_maxage_global_usage(): |
| 207 | + title = "Global common 'max-age' values of the Strict Transport Security header" |
| 208 | + query = "select lower(http_header_value) from stats where lower(http_header_name) = 'strict-transport-security' and lower(http_header_value) like '%max-age=%'" |
| 209 | + header_values = execute_query_against_data_db(query) |
| 210 | + expr = r'max-age\s*=\s*(\-?"?\d+"?)' |
| 211 | + # Gather values for max-age attribute |
| 212 | + values = [] |
| 213 | + for header_value in header_values: |
| 214 | + v = header_value[0].strip('\n\r\t').replace('"', '') |
| 215 | + matches = re.findall(expr, v) |
| 216 | + if len(matches) > 0: |
| 217 | + values.append(int(matches[0])) |
| 218 | + # Find the most popular one |
| 219 | + occurences = Counter(values) |
| 220 | + maxage_most_popular_value = 0 |
| 221 | + current_max_occurence_count = 0 |
| 222 | + for maxage_value, occurence_count in occurences.items(): |
| 223 | + if occurence_count > current_max_occurence_count: |
| 224 | + current_max_occurence_count = occurence_count |
| 225 | + maxage_most_popular_value = maxage_value |
| 226 | + description = f"* Most common value used is {maxage_most_popular_value} seconds ({round(maxage_most_popular_value/60)} minutes) across all domains analyzed." |
| 227 | + description += f"\n* Maximum value used is {max(values)} seconds ({round(max(values)/60)} minutes) across all domains analyzed." |
| 228 | + description += f"\n* Minimum value used is {min(values)} seconds ({round(min(values)/60)} minutes) across all domains analyzed." |
| 229 | + add_stats_section(title, description, None) |
| 230 | + |
| 231 | + |
| 232 | +def compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage(): |
| 233 | + header_name = "content-security-policy" |
| 234 | + title = f"Global usage of content security policy with directives allowing unsafe expressions" |
| 235 | + description = f"Provide the distribution of content security policy allowing unsafe expressions across all domains analyzed.\n\nDetermine if a CSP policy contains `(default-src|script-src|script-src-elem|script-src-attr|style-src)` directives using `(unsafe-inline|unsafe-hashes|unsafe-eval)` expressions.\n\nBased on [Report-URI CSP](https://report-uri.com/home/generate) generator allowed instructions for CSP directives." |
| 236 | + query = f"select lower(http_header_value) from stats where lower(http_header_name) like '{header_name}%' and lower(http_header_value) like '%unsafe%'" |
| 237 | + header_values = execute_query_against_data_db(query) |
| 238 | + count_of_domains = 0 |
| 239 | + for header_value in header_values: |
| 240 | + if csp_contain_unsafe_expression(header_value[0]): |
| 241 | + count_of_domains += 1 |
| 242 | + domains_count = get_domains_count() |
| 243 | + percentage_of_domains = (count_of_domains * 100) / domains_count |
| 244 | + dataset_tuples = [("Using unsafe", percentage_of_domains), |
| 245 | + ("Not using unsafe", (100-percentage_of_domains))] |
| 246 | + pie_chart_code = get_pie_chart_code(title, dataset_tuples) |
| 247 | + add_stats_section(title, description, pie_chart_code) |
| 248 | + |
| 249 | + |
| 250 | +if __name__ == "__main__": |
| 251 | + for path in Path(IMAGE_FOLDER_LOCATION).glob("*.png"): |
| 252 | + path.unlink() |
| 253 | + for path in Path(IMAGE_FOLDER_LOCATION).glob("*.mmd"): |
| 254 | + path.unlink() |
| 255 | + oshp_headers = load_oshp_headers() |
| 256 | + init_stats_file() |
| 257 | + compute_secure_headers_global_usage() |
| 258 | + for header_name in oshp_headers: |
| 259 | + compute_header_global_usage(header_name) |
| 260 | + compute_insecure_framing_configuration_global_usage() |
| 261 | + compute_insecure_referrer_configuration_global_usage() |
| 262 | + compute_hsts_preload_global_usage() |
| 263 | + compute_hsts_average_maxage_global_usage() |
| 264 | + compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage() |
0 commit comments