[ci skip] Add missing patch files

nielsdos · nielsdos · commit 775fb31d01bf · 2023-12-03T22:48:03.000+01:00
diff --git a/ext/dom/lexbor/patches/0001-Remove-unused-upper-case-tag-static-data.patch b/ext/dom/lexbor/patches/0001-Remove-unused-upper-case-tag-static-data.patch
@@ -0,0 +1,53 @@
+From 24b52ec63eb55adb1c039e58dd3e1156f01083b2 Mon Sep 17 00:00:00 2001
+From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
+Date: Wed, 29 Nov 2023 21:26:47 +0100
+Subject: [PATCH 1/2] Remove unused upper case tag static data
+
+---
+ source/lexbor/tag/res.h | 2 ++
+ source/lexbor/tag/tag.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/source/lexbor/tag/res.h b/source/lexbor/tag/res.h
+index c7190c5..4ad1f37 100644
+--- a/source/lexbor/tag/res.h
++++ b/source/lexbor/tag/res.h
+@@ -224,6 +224,7 @@ static const lxb_tag_data_t lxb_tag_res_data_default[LXB_TAG__LAST_ENTRY] =
+     {{.u.short_str = "xmp", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true}
+ };
+ 
++#if 0
+ static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY] = 
+ {
+     {{.u.short_str = "#UNDEF", .length = 6, .next = NULL}, LXB_TAG__UNDEF, 1, true},
+@@ -423,6 +424,7 @@ static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY]
+     {{.u.short_str = "WBR", .length = 3, .next = NULL}, LXB_TAG_WBR, 1, true},
+     {{.u.short_str = "XMP", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true}
+ };
++#endif
+ 
+ static const lexbor_shs_entry_t lxb_tag_res_shs_data_default[] = 
+ {
+diff --git a/source/lexbor/tag/tag.c b/source/lexbor/tag/tag.c
+index f8fcdf0..0571957 100755
+--- a/source/lexbor/tag/tag.c
++++ b/source/lexbor/tag/tag.c
+@@ -92,6 +92,7 @@ lxb_tag_data_by_name(lexbor_hash_t *hash, const lxb_char_t *name, size_t len)
+                                            lexbor_hash_search_lower, name, len);
+ }
+ 
++#if 0
+ const lxb_tag_data_t *
+ lxb_tag_data_by_name_upper(lexbor_hash_t *hash,
+                            const lxb_char_t *name, size_t len)
+@@ -114,6 +115,7 @@ lxb_tag_data_by_name_upper(lexbor_hash_t *hash,
+     return (const lxb_tag_data_t *) lexbor_hash_search(hash,
+                                            lexbor_hash_search_upper, name, len);
+ }
++#endif
+ 
+ /*
+  * No inline functions for ABI.
+-- 
+2.43.0
+
diff --git a/ext/dom/lexbor/patches/0001-Shrink-size-of-static-binary-search-tree.patch b/ext/dom/lexbor/patches/0001-Shrink-size-of-static-binary-search-tree.patch
@@ -0,0 +1,115 @@
+From 7fde66f32dcfbdc5df97fbffe411c0d7fd60fa50 Mon Sep 17 00:00:00 2001
+From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
+Date: Wed, 29 Nov 2023 21:29:31 +0100
+Subject: [PATCH 2/2] Shrink size of static binary search tree
+
+This also makes it more efficient on the data cache.
+---
+ source/lexbor/core/sbst.h                   | 10 +++++-----
+ source/lexbor/html/tokenizer/state.c        |  2 +-
+ utils/lexbor/html/tmp/tokenizer_res.h       |  2 +-
+ utils/lexbor/html/tokenizer_entities_bst.py |  8 ++++----
+ utils/lexbor/lexbor/LXB.py                  |  2 +-
+ 5 files changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/source/lexbor/core/sbst.h b/source/lexbor/core/sbst.h
+index b0fbc54..40e0e91 100755
+--- a/source/lexbor/core/sbst.h
++++ b/source/lexbor/core/sbst.h
+@@ -19,12 +19,12 @@ extern "C" {
+ typedef struct {
+     lxb_char_t key;
+ 
+-    void       *value;
+-    size_t     value_len;
++    lxb_char_t         value[6];
++    unsigned char      value_len;
+ 
+-    size_t     left;
+-    size_t     right;
+-    size_t     next;
++    unsigned short     left;
++    unsigned short     right;
++    unsigned short     next;
+ }
+ lexbor_sbst_entry_static_t;
+ 
+diff --git a/source/lexbor/html/tokenizer/state.c b/source/lexbor/html/tokenizer/state.c
+index 70ca391..2f3414f 100755
+--- a/source/lexbor/html/tokenizer/state.c
++++ b/source/lexbor/html/tokenizer/state.c
+@@ -1815,7 +1815,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz,
+             goto done;
+         }
+ 
+-        if (entry->value != NULL) {
++        if (entry->value[0] != 0) {
+             tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start;
+             tkz->entity_match = entry;
+         }
+diff --git a/utils/lexbor/html/tmp/tokenizer_res.h b/utils/lexbor/html/tmp/tokenizer_res.h
+index b3701d5..73ab66e 100755
+--- a/utils/lexbor/html/tmp/tokenizer_res.h
++++ b/utils/lexbor/html/tmp/tokenizer_res.h
+@@ -6,7 +6,7 @@
+ 
+ /*
+  * Caution!!! Important!!!
+- * This file generated by the script
++ * This file is generated by the script
+  * "utils/lexbor/html/tokenizer_entities_bst.py"!
+  * Do not change this file!
+  */
+diff --git a/utils/lexbor/html/tokenizer_entities_bst.py b/utils/lexbor/html/tokenizer_entities_bst.py
+index ee7dcb4..7cd1335 100755
+--- a/utils/lexbor/html/tokenizer_entities_bst.py
++++ b/utils/lexbor/html/tokenizer_entities_bst.py
+@@ -1,6 +1,6 @@
+ 
+ import json
+-import sys, re, os
++import sys, os
+ 
+ # Find and append run script run dir to module search path
+ ABS_PATH = os.path.dirname(os.path.abspath(__file__))
+@@ -62,7 +62,7 @@ def entities_bst_create_layer(name, entry, index):
+ 
+ def entities_bst_create(index):
+     bst = {}
+-    bst[0] = ["\0", 0, 0, 0, "NULL"]
++    bst[0] = ["\0", 0, 0, 0, "{0}"]
+ 
+     begin = 1
+     idx = end = entities_bst_create_tree(index, bst, begin)
+@@ -114,7 +114,7 @@ def entities_bst_create_tree(index, bst, idx):
+         assert len(index[ split[0] ]['values']) < 2, 'Double values'
+ 
+         if len(index[ split[0] ]['values']) == 0:
+-            value = "NULL"
++            value = "{0}"
+         else:
+             value = '"{}"'.format(toHex(index[ split[0] ]['values'][0]['characters']))
+ 
+@@ -210,5 +210,5 @@ def entities_bst_print(bst):
+ 
+ if __name__ == "__main__":
+     entities_bst("tmp/tokenizer_res.h",
+-                 "../../../source/lexbor/html/tokenizer_res.h",
++                 "../../../source/lexbor/html/tokenizer/res.h",
+                  "data/entities.json");
+diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py
+index 3e75812..b068ea3 100755
+--- a/utils/lexbor/lexbor/LXB.py
++++ b/utils/lexbor/lexbor/LXB.py
+@@ -27,7 +27,7 @@ class Temp:
+ 
+         for line in fh:
+             for name in self.patterns:
+-                line = re.sub(name, '\n'.join(self.patterns[name]), line)
++                line = line.replace(name, '\n'.join(self.patterns[name]))
+             self.buffer.append(line)
+         fh.close()
+ 
+-- 
+2.43.0
+