Skip to content

Commit 0ab0d87

Browse files
committed
recipes: add patches for python3 recipes, for reproducible .pyc generation
patches from: python/cpython#27926 python/cpython#8226 Both of these are included in python 3.11 upstream.
1 parent d4432ec commit 0ab0d87

File tree

6 files changed

+264
-2
lines changed

6 files changed

+264
-2
lines changed

pythonforandroid/recipes/hostpython3/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from os.path import join
77

88
from pythonforandroid.logger import shprint
9+
from pythonforandroid.patching import version_starts_with
910
from pythonforandroid.recipe import Recipe
1011
from pythonforandroid.util import (
1112
BuildInterruptingException,
@@ -46,7 +47,11 @@ class HostPython3Recipe(Recipe):
4647
'''The default url to download our host python recipe. This url will
4748
change depending on the python version set in attribute :attr:`version`.'''
4849

49-
patches = ['patches/pyconfig_detection.patch']
50+
patches = [
51+
'patches/pyconfig_detection.patch',
52+
('patches/py3.10_reproducible-pyc.diff', version_starts_with("3.10")),
53+
('patches/py3.10_reproducible-marshal_flagref.patch', version_starts_with("3.10")),
54+
]
5055

5156
@property
5257
def _exe_name(self):
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
From 6c8ea7c1dacd42f3ba00440231ec0e6b1a38300d Mon Sep 17 00:00:00 2001
2+
From: Inada Naoki <[email protected]>
3+
Date: Sat, 14 Jul 2018 00:46:11 +0900
4+
Subject: [PATCH] Use FLAG_REF always for interned strings
5+
6+
---
7+
Python/marshal.c | 9 +++++++--
8+
1 file changed, 7 insertions(+), 2 deletions(-)
9+
10+
diff --git a/Python/marshal.c b/Python/marshal.c
11+
index 6d06266c6a8e2e..51db2e3b2e29a2 100644
12+
--- a/Python/marshal.c
13+
+++ b/Python/marshal.c
14+
@@ -275,9 +275,14 @@ w_ref(PyObject *v, char *flag, WFILE *p)
15+
if (p->version < 3 || p->hashtable == NULL)
16+
return 0; /* not writing object references */
17+
18+
- /* if it has only one reference, it definitely isn't shared */
19+
- if (Py_REFCNT(v) == 1)
20+
+ /* If it has only one reference, it definitely isn't shared.
21+
+ * But we use TYPE_REF always for interned string, to PYC file stable
22+
+ * as possible.
23+
+ */
24+
+ if (Py_REFCNT(v) == 1 &&
25+
+ !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
26+
return 0;
27+
+ }
28+
29+
entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
30+
if (entry != NULL) {
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
From 36ae9beb04763d498df2114657bfbbcfe58bf913 Mon Sep 17 00:00:00 2001
2+
From: Brandt Bucher <[email protected]>
3+
Date: Mon, 23 Aug 2021 18:34:17 -0700
4+
Subject: [PATCH] Serialize frozenset elements deterministically
5+
6+
---
7+
Lib/test/test_marshal.py | 25 +++++++++++++++
8+
.../2021-08-23-21-39-59.bpo-37596.ojRcwB.rst | 2 ++
9+
Python/marshal.c | 32 +++++++++++++++++++
10+
3 files changed, 59 insertions(+)
11+
create mode 100644 Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst
12+
13+
--- a/Lib/test/test_marshal.py
14+
+++ b/Lib/test/test_marshal.py
15+
@@ -1,5 +1,6 @@
16+
from test import support
17+
from test.support import os_helper
18+
+from test.support.script_helper import assert_python_ok
19+
import array
20+
import io
21+
import marshal
22+
@@ -318,6 +319,31 @@ class BugsTestCase(unittest.TestCase):
23+
for i in range(len(data)):
24+
self.assertRaises(EOFError, marshal.loads, data[0: i])
25+
26+
+ def test_deterministic_sets(self):
27+
+ # bpo-37596: To support reproducible builds, sets and frozensets need to
28+
+ # have their elements serialized in a consistent order (even when they
29+
+ # have been scrambled by hash randomization):
30+
+ for kind in ("set", "frozenset"):
31+
+ for elements in (
32+
+ "float('nan'), b'a', b'b', b'c', 'x', 'y', 'z'",
33+
+ # Also test for bad interactions with backreferencing:
34+
+ "('string', 1), ('string', 2), ('string', 3)",
35+
+ ):
36+
+ s = f"{kind}([{elements}])"
37+
+ with self.subTest(s):
38+
+ # First, make sure that our test case still has different
39+
+ # orders under hash seeds 0 and 1. If this check fails, we
40+
+ # need to update this test with different elements:
41+
+ args = ["-c", f"print({s})"]
42+
+ _, repr_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0")
43+
+ _, repr_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1")
44+
+ self.assertNotEqual(repr_0, repr_1)
45+
+ # Then, perform the actual test:
46+
+ args = ["-c", f"import marshal; print(marshal.dumps({s}))"]
47+
+ _, dump_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0")
48+
+ _, dump_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1")
49+
+ self.assertEqual(dump_0, dump_1)
50+
+
51+
LARGE_SIZE = 2**31
52+
pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4
53+
54+
--- a/Python/marshal.c
55+
+++ b/Python/marshal.c
56+
@@ -502,9 +502,41 @@ w_complex_object(PyObject *v, char flag,
57+
W_TYPE(TYPE_SET, p);
58+
n = PySet_GET_SIZE(v);
59+
W_SIZE(n, p);
60+
+ // bpo-37596: To support reproducible builds, sets and frozensets need
61+
+ // to have their elements serialized in a consistent order (even when
62+
+ // they have been scrambled by hash randomization). To ensure this, we
63+
+ // use an order equivalent to sorted(v, key=marshal.dumps):
64+
+ PyObject *pairs = PyList_New(0);
65+
+ if (pairs == NULL) {
66+
+ p->error = WFERR_NOMEMORY;
67+
+ return;
68+
+ }
69+
while (_PySet_NextEntry(v, &pos, &value, &hash)) {
70+
+ PyObject *dump = PyMarshal_WriteObjectToString(value, p->version);
71+
+ if (dump == NULL) {
72+
+ p->error = WFERR_UNMARSHALLABLE;
73+
+ goto anyset_done;
74+
+ }
75+
+ PyObject *pair = PyTuple_Pack(2, dump, value);
76+
+ Py_DECREF(dump);
77+
+ if (pair == NULL || PyList_Append(pairs, pair)) {
78+
+ p->error = WFERR_NOMEMORY;
79+
+ Py_XDECREF(pair);
80+
+ goto anyset_done;
81+
+ }
82+
+ Py_DECREF(pair);
83+
+ }
84+
+ if (PyList_Sort(pairs)) {
85+
+ p->error = WFERR_NOMEMORY;
86+
+ goto anyset_done;
87+
+ }
88+
+ for (Py_ssize_t i = 0; i < n; i++) {
89+
+ PyObject *pair = PyList_GET_ITEM(pairs, i);
90+
+ value = PyTuple_GET_ITEM(pair, 1);
91+
w_object(value, p);
92+
}
93+
+ anyset_done:
94+
+ Py_DECREF(pairs);
95+
}
96+
else if (PyCode_Check(v)) {
97+
PyCodeObject *co = (PyCodeObject *)v;

pythonforandroid/recipes/python3/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,10 @@ class Python3Recipe(TargetPythonRecipe):
7171
# Python 3.8.1 & 3.9.X
7272
('patches/py3.8.1.patch', version_starts_with("3.8")),
7373
('patches/py3.8.1.patch', version_starts_with("3.9")),
74-
('patches/py3.8.1.patch', version_starts_with("3.10"))
74+
('patches/py3.8.1.patch', version_starts_with("3.10")),
75+
76+
('patches/py3.10_reproducible-pyc.diff', version_starts_with("3.10")),
77+
('patches/py3.10_reproducible-marshal_flagref.patch', version_starts_with("3.10")),
7578
]
7679

7780
if shutil.which('lld') is not None:
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
From 6c8ea7c1dacd42f3ba00440231ec0e6b1a38300d Mon Sep 17 00:00:00 2001
2+
From: Inada Naoki <[email protected]>
3+
Date: Sat, 14 Jul 2018 00:46:11 +0900
4+
Subject: [PATCH] Use FLAG_REF always for interned strings
5+
6+
---
7+
Python/marshal.c | 9 +++++++--
8+
1 file changed, 7 insertions(+), 2 deletions(-)
9+
10+
diff --git a/Python/marshal.c b/Python/marshal.c
11+
index 6d06266c6a8e2e..51db2e3b2e29a2 100644
12+
--- a/Python/marshal.c
13+
+++ b/Python/marshal.c
14+
@@ -275,9 +275,14 @@ w_ref(PyObject *v, char *flag, WFILE *p)
15+
if (p->version < 3 || p->hashtable == NULL)
16+
return 0; /* not writing object references */
17+
18+
- /* if it has only one reference, it definitely isn't shared */
19+
- if (Py_REFCNT(v) == 1)
20+
+ /* If it has only one reference, it definitely isn't shared.
21+
+ * But we use TYPE_REF always for interned string, to PYC file stable
22+
+ * as possible.
23+
+ */
24+
+ if (Py_REFCNT(v) == 1 &&
25+
+ !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
26+
return 0;
27+
+ }
28+
29+
entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
30+
if (entry != NULL) {
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
From 36ae9beb04763d498df2114657bfbbcfe58bf913 Mon Sep 17 00:00:00 2001
2+
From: Brandt Bucher <[email protected]>
3+
Date: Mon, 23 Aug 2021 18:34:17 -0700
4+
Subject: [PATCH] Serialize frozenset elements deterministically
5+
6+
---
7+
Lib/test/test_marshal.py | 25 +++++++++++++++
8+
.../2021-08-23-21-39-59.bpo-37596.ojRcwB.rst | 2 ++
9+
Python/marshal.c | 32 +++++++++++++++++++
10+
3 files changed, 59 insertions(+)
11+
create mode 100644 Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst
12+
13+
--- a/Lib/test/test_marshal.py
14+
+++ b/Lib/test/test_marshal.py
15+
@@ -1,5 +1,6 @@
16+
from test import support
17+
from test.support import os_helper
18+
+from test.support.script_helper import assert_python_ok
19+
import array
20+
import io
21+
import marshal
22+
@@ -318,6 +319,31 @@ class BugsTestCase(unittest.TestCase):
23+
for i in range(len(data)):
24+
self.assertRaises(EOFError, marshal.loads, data[0: i])
25+
26+
+ def test_deterministic_sets(self):
27+
+ # bpo-37596: To support reproducible builds, sets and frozensets need to
28+
+ # have their elements serialized in a consistent order (even when they
29+
+ # have been scrambled by hash randomization):
30+
+ for kind in ("set", "frozenset"):
31+
+ for elements in (
32+
+ "float('nan'), b'a', b'b', b'c', 'x', 'y', 'z'",
33+
+ # Also test for bad interactions with backreferencing:
34+
+ "('string', 1), ('string', 2), ('string', 3)",
35+
+ ):
36+
+ s = f"{kind}([{elements}])"
37+
+ with self.subTest(s):
38+
+ # First, make sure that our test case still has different
39+
+ # orders under hash seeds 0 and 1. If this check fails, we
40+
+ # need to update this test with different elements:
41+
+ args = ["-c", f"print({s})"]
42+
+ _, repr_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0")
43+
+ _, repr_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1")
44+
+ self.assertNotEqual(repr_0, repr_1)
45+
+ # Then, perform the actual test:
46+
+ args = ["-c", f"import marshal; print(marshal.dumps({s}))"]
47+
+ _, dump_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0")
48+
+ _, dump_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1")
49+
+ self.assertEqual(dump_0, dump_1)
50+
+
51+
LARGE_SIZE = 2**31
52+
pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4
53+
54+
--- a/Python/marshal.c
55+
+++ b/Python/marshal.c
56+
@@ -502,9 +502,41 @@ w_complex_object(PyObject *v, char flag,
57+
W_TYPE(TYPE_SET, p);
58+
n = PySet_GET_SIZE(v);
59+
W_SIZE(n, p);
60+
+ // bpo-37596: To support reproducible builds, sets and frozensets need
61+
+ // to have their elements serialized in a consistent order (even when
62+
+ // they have been scrambled by hash randomization). To ensure this, we
63+
+ // use an order equivalent to sorted(v, key=marshal.dumps):
64+
+ PyObject *pairs = PyList_New(0);
65+
+ if (pairs == NULL) {
66+
+ p->error = WFERR_NOMEMORY;
67+
+ return;
68+
+ }
69+
while (_PySet_NextEntry(v, &pos, &value, &hash)) {
70+
+ PyObject *dump = PyMarshal_WriteObjectToString(value, p->version);
71+
+ if (dump == NULL) {
72+
+ p->error = WFERR_UNMARSHALLABLE;
73+
+ goto anyset_done;
74+
+ }
75+
+ PyObject *pair = PyTuple_Pack(2, dump, value);
76+
+ Py_DECREF(dump);
77+
+ if (pair == NULL || PyList_Append(pairs, pair)) {
78+
+ p->error = WFERR_NOMEMORY;
79+
+ Py_XDECREF(pair);
80+
+ goto anyset_done;
81+
+ }
82+
+ Py_DECREF(pair);
83+
+ }
84+
+ if (PyList_Sort(pairs)) {
85+
+ p->error = WFERR_NOMEMORY;
86+
+ goto anyset_done;
87+
+ }
88+
+ for (Py_ssize_t i = 0; i < n; i++) {
89+
+ PyObject *pair = PyList_GET_ITEM(pairs, i);
90+
+ value = PyTuple_GET_ITEM(pair, 1);
91+
w_object(value, p);
92+
}
93+
+ anyset_done:
94+
+ Py_DECREF(pairs);
95+
}
96+
else if (PyCode_Check(v)) {
97+
PyCodeObject *co = (PyCodeObject *)v;

0 commit comments

Comments
 (0)