Skip to content

Commit 8714b6f

Browse files
bpo-46881: Statically allocate and initialize the latin1 characters. (GH-31616)
1 parent e801e88 commit 8714b6f

File tree

6 files changed

+317
-66
lines changed

6 files changed

+317
-66
lines changed

Include/internal/pycore_global_strings.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,14 @@ struct _Py_global_strings {
350350
STRUCT_FOR_ID(write)
351351
STRUCT_FOR_ID(zipimporter)
352352
} identifiers;
353+
struct {
354+
PyASCIIObject _ascii;
355+
uint8_t _data[2];
356+
} ascii[128];
357+
struct {
358+
PyCompactUnicodeObject _latin1;
359+
uint8_t _data[2];
360+
} latin1[128];
353361
};
354362
/* End auto-generated code */
355363

Include/internal/pycore_runtime_init.h

Lines changed: 281 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -93,26 +93,34 @@ extern "C" {
9393
_PyBytes_SIMPLE_INIT(CH, 1) \
9494
}
9595

96-
#define _PyASCIIObject_INIT(LITERAL) \
96+
#define _PyUnicode_ASCII_BASE_INIT(LITERAL, ASCII) \
9797
{ \
98-
._ascii = { \
99-
.ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \
100-
.length = sizeof(LITERAL) - 1, \
101-
.hash = -1, \
102-
.state = { \
103-
.kind = 1, \
104-
.compact = 1, \
105-
.ascii = 1, \
106-
.ready = 1, \
107-
}, \
98+
.ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \
99+
.length = sizeof(LITERAL) - 1, \
100+
.hash = -1, \
101+
.state = { \
102+
.kind = 1, \
103+
.compact = 1, \
104+
.ascii = ASCII, \
105+
.ready = 1, \
108106
}, \
109-
._data = LITERAL, \
107+
}
108+
#define _PyASCIIObject_INIT(LITERAL) \
109+
{ \
110+
._ascii = _PyUnicode_ASCII_BASE_INIT(LITERAL, 1), \
111+
._data = LITERAL \
110112
}
111113
#define INIT_STR(NAME, LITERAL) \
112114
._ ## NAME = _PyASCIIObject_INIT(LITERAL)
113115
#define INIT_ID(NAME) \
114116
._ ## NAME = _PyASCIIObject_INIT(#NAME)
115-
117+
#define _PyUnicode_LATIN1_INIT(LITERAL) \
118+
{ \
119+
._latin1 = { \
120+
._base = _PyUnicode_ASCII_BASE_INIT(LITERAL, 0), \
121+
}, \
122+
._data = LITERAL, \
123+
}
116124

117125
/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */
118126
#define _Py_global_objects_INIT { \
@@ -965,6 +973,266 @@ extern "C" {
965973
INIT_ID(write), \
966974
INIT_ID(zipimporter), \
967975
}, \
976+
.ascii = { \
977+
_PyASCIIObject_INIT("\x00"), \
978+
_PyASCIIObject_INIT("\x01"), \
979+
_PyASCIIObject_INIT("\x02"), \
980+
_PyASCIIObject_INIT("\x03"), \
981+
_PyASCIIObject_INIT("\x04"), \
982+
_PyASCIIObject_INIT("\x05"), \
983+
_PyASCIIObject_INIT("\x06"), \
984+
_PyASCIIObject_INIT("\x07"), \
985+
_PyASCIIObject_INIT("\x08"), \
986+
_PyASCIIObject_INIT("\x09"), \
987+
_PyASCIIObject_INIT("\x0a"), \
988+
_PyASCIIObject_INIT("\x0b"), \
989+
_PyASCIIObject_INIT("\x0c"), \
990+
_PyASCIIObject_INIT("\x0d"), \
991+
_PyASCIIObject_INIT("\x0e"), \
992+
_PyASCIIObject_INIT("\x0f"), \
993+
_PyASCIIObject_INIT("\x10"), \
994+
_PyASCIIObject_INIT("\x11"), \
995+
_PyASCIIObject_INIT("\x12"), \
996+
_PyASCIIObject_INIT("\x13"), \
997+
_PyASCIIObject_INIT("\x14"), \
998+
_PyASCIIObject_INIT("\x15"), \
999+
_PyASCIIObject_INIT("\x16"), \
1000+
_PyASCIIObject_INIT("\x17"), \
1001+
_PyASCIIObject_INIT("\x18"), \
1002+
_PyASCIIObject_INIT("\x19"), \
1003+
_PyASCIIObject_INIT("\x1a"), \
1004+
_PyASCIIObject_INIT("\x1b"), \
1005+
_PyASCIIObject_INIT("\x1c"), \
1006+
_PyASCIIObject_INIT("\x1d"), \
1007+
_PyASCIIObject_INIT("\x1e"), \
1008+
_PyASCIIObject_INIT("\x1f"), \
1009+
_PyASCIIObject_INIT("\x20"), \
1010+
_PyASCIIObject_INIT("\x21"), \
1011+
_PyASCIIObject_INIT("\x22"), \
1012+
_PyASCIIObject_INIT("\x23"), \
1013+
_PyASCIIObject_INIT("\x24"), \
1014+
_PyASCIIObject_INIT("\x25"), \
1015+
_PyASCIIObject_INIT("\x26"), \
1016+
_PyASCIIObject_INIT("\x27"), \
1017+
_PyASCIIObject_INIT("\x28"), \
1018+
_PyASCIIObject_INIT("\x29"), \
1019+
_PyASCIIObject_INIT("\x2a"), \
1020+
_PyASCIIObject_INIT("\x2b"), \
1021+
_PyASCIIObject_INIT("\x2c"), \
1022+
_PyASCIIObject_INIT("\x2d"), \
1023+
_PyASCIIObject_INIT("\x2e"), \
1024+
_PyASCIIObject_INIT("\x2f"), \
1025+
_PyASCIIObject_INIT("\x30"), \
1026+
_PyASCIIObject_INIT("\x31"), \
1027+
_PyASCIIObject_INIT("\x32"), \
1028+
_PyASCIIObject_INIT("\x33"), \
1029+
_PyASCIIObject_INIT("\x34"), \
1030+
_PyASCIIObject_INIT("\x35"), \
1031+
_PyASCIIObject_INIT("\x36"), \
1032+
_PyASCIIObject_INIT("\x37"), \
1033+
_PyASCIIObject_INIT("\x38"), \
1034+
_PyASCIIObject_INIT("\x39"), \
1035+
_PyASCIIObject_INIT("\x3a"), \
1036+
_PyASCIIObject_INIT("\x3b"), \
1037+
_PyASCIIObject_INIT("\x3c"), \
1038+
_PyASCIIObject_INIT("\x3d"), \
1039+
_PyASCIIObject_INIT("\x3e"), \
1040+
_PyASCIIObject_INIT("\x3f"), \
1041+
_PyASCIIObject_INIT("\x40"), \
1042+
_PyASCIIObject_INIT("\x41"), \
1043+
_PyASCIIObject_INIT("\x42"), \
1044+
_PyASCIIObject_INIT("\x43"), \
1045+
_PyASCIIObject_INIT("\x44"), \
1046+
_PyASCIIObject_INIT("\x45"), \
1047+
_PyASCIIObject_INIT("\x46"), \
1048+
_PyASCIIObject_INIT("\x47"), \
1049+
_PyASCIIObject_INIT("\x48"), \
1050+
_PyASCIIObject_INIT("\x49"), \
1051+
_PyASCIIObject_INIT("\x4a"), \
1052+
_PyASCIIObject_INIT("\x4b"), \
1053+
_PyASCIIObject_INIT("\x4c"), \
1054+
_PyASCIIObject_INIT("\x4d"), \
1055+
_PyASCIIObject_INIT("\x4e"), \
1056+
_PyASCIIObject_INIT("\x4f"), \
1057+
_PyASCIIObject_INIT("\x50"), \
1058+
_PyASCIIObject_INIT("\x51"), \
1059+
_PyASCIIObject_INIT("\x52"), \
1060+
_PyASCIIObject_INIT("\x53"), \
1061+
_PyASCIIObject_INIT("\x54"), \
1062+
_PyASCIIObject_INIT("\x55"), \
1063+
_PyASCIIObject_INIT("\x56"), \
1064+
_PyASCIIObject_INIT("\x57"), \
1065+
_PyASCIIObject_INIT("\x58"), \
1066+
_PyASCIIObject_INIT("\x59"), \
1067+
_PyASCIIObject_INIT("\x5a"), \
1068+
_PyASCIIObject_INIT("\x5b"), \
1069+
_PyASCIIObject_INIT("\x5c"), \
1070+
_PyASCIIObject_INIT("\x5d"), \
1071+
_PyASCIIObject_INIT("\x5e"), \
1072+
_PyASCIIObject_INIT("\x5f"), \
1073+
_PyASCIIObject_INIT("\x60"), \
1074+
_PyASCIIObject_INIT("\x61"), \
1075+
_PyASCIIObject_INIT("\x62"), \
1076+
_PyASCIIObject_INIT("\x63"), \
1077+
_PyASCIIObject_INIT("\x64"), \
1078+
_PyASCIIObject_INIT("\x65"), \
1079+
_PyASCIIObject_INIT("\x66"), \
1080+
_PyASCIIObject_INIT("\x67"), \
1081+
_PyASCIIObject_INIT("\x68"), \
1082+
_PyASCIIObject_INIT("\x69"), \
1083+
_PyASCIIObject_INIT("\x6a"), \
1084+
_PyASCIIObject_INIT("\x6b"), \
1085+
_PyASCIIObject_INIT("\x6c"), \
1086+
_PyASCIIObject_INIT("\x6d"), \
1087+
_PyASCIIObject_INIT("\x6e"), \
1088+
_PyASCIIObject_INIT("\x6f"), \
1089+
_PyASCIIObject_INIT("\x70"), \
1090+
_PyASCIIObject_INIT("\x71"), \
1091+
_PyASCIIObject_INIT("\x72"), \
1092+
_PyASCIIObject_INIT("\x73"), \
1093+
_PyASCIIObject_INIT("\x74"), \
1094+
_PyASCIIObject_INIT("\x75"), \
1095+
_PyASCIIObject_INIT("\x76"), \
1096+
_PyASCIIObject_INIT("\x77"), \
1097+
_PyASCIIObject_INIT("\x78"), \
1098+
_PyASCIIObject_INIT("\x79"), \
1099+
_PyASCIIObject_INIT("\x7a"), \
1100+
_PyASCIIObject_INIT("\x7b"), \
1101+
_PyASCIIObject_INIT("\x7c"), \
1102+
_PyASCIIObject_INIT("\x7d"), \
1103+
_PyASCIIObject_INIT("\x7e"), \
1104+
_PyASCIIObject_INIT("\x7f"), \
1105+
}, \
1106+
.latin1 = { \
1107+
_PyUnicode_LATIN1_INIT("\x80"), \
1108+
_PyUnicode_LATIN1_INIT("\x81"), \
1109+
_PyUnicode_LATIN1_INIT("\x82"), \
1110+
_PyUnicode_LATIN1_INIT("\x83"), \
1111+
_PyUnicode_LATIN1_INIT("\x84"), \
1112+
_PyUnicode_LATIN1_INIT("\x85"), \
1113+
_PyUnicode_LATIN1_INIT("\x86"), \
1114+
_PyUnicode_LATIN1_INIT("\x87"), \
1115+
_PyUnicode_LATIN1_INIT("\x88"), \
1116+
_PyUnicode_LATIN1_INIT("\x89"), \
1117+
_PyUnicode_LATIN1_INIT("\x8a"), \
1118+
_PyUnicode_LATIN1_INIT("\x8b"), \
1119+
_PyUnicode_LATIN1_INIT("\x8c"), \
1120+
_PyUnicode_LATIN1_INIT("\x8d"), \
1121+
_PyUnicode_LATIN1_INIT("\x8e"), \
1122+
_PyUnicode_LATIN1_INIT("\x8f"), \
1123+
_PyUnicode_LATIN1_INIT("\x90"), \
1124+
_PyUnicode_LATIN1_INIT("\x91"), \
1125+
_PyUnicode_LATIN1_INIT("\x92"), \
1126+
_PyUnicode_LATIN1_INIT("\x93"), \
1127+
_PyUnicode_LATIN1_INIT("\x94"), \
1128+
_PyUnicode_LATIN1_INIT("\x95"), \
1129+
_PyUnicode_LATIN1_INIT("\x96"), \
1130+
_PyUnicode_LATIN1_INIT("\x97"), \
1131+
_PyUnicode_LATIN1_INIT("\x98"), \
1132+
_PyUnicode_LATIN1_INIT("\x99"), \
1133+
_PyUnicode_LATIN1_INIT("\x9a"), \
1134+
_PyUnicode_LATIN1_INIT("\x9b"), \
1135+
_PyUnicode_LATIN1_INIT("\x9c"), \
1136+
_PyUnicode_LATIN1_INIT("\x9d"), \
1137+
_PyUnicode_LATIN1_INIT("\x9e"), \
1138+
_PyUnicode_LATIN1_INIT("\x9f"), \
1139+
_PyUnicode_LATIN1_INIT("\xa0"), \
1140+
_PyUnicode_LATIN1_INIT("\xa1"), \
1141+
_PyUnicode_LATIN1_INIT("\xa2"), \
1142+
_PyUnicode_LATIN1_INIT("\xa3"), \
1143+
_PyUnicode_LATIN1_INIT("\xa4"), \
1144+
_PyUnicode_LATIN1_INIT("\xa5"), \
1145+
_PyUnicode_LATIN1_INIT("\xa6"), \
1146+
_PyUnicode_LATIN1_INIT("\xa7"), \
1147+
_PyUnicode_LATIN1_INIT("\xa8"), \
1148+
_PyUnicode_LATIN1_INIT("\xa9"), \
1149+
_PyUnicode_LATIN1_INIT("\xaa"), \
1150+
_PyUnicode_LATIN1_INIT("\xab"), \
1151+
_PyUnicode_LATIN1_INIT("\xac"), \
1152+
_PyUnicode_LATIN1_INIT("\xad"), \
1153+
_PyUnicode_LATIN1_INIT("\xae"), \
1154+
_PyUnicode_LATIN1_INIT("\xaf"), \
1155+
_PyUnicode_LATIN1_INIT("\xb0"), \
1156+
_PyUnicode_LATIN1_INIT("\xb1"), \
1157+
_PyUnicode_LATIN1_INIT("\xb2"), \
1158+
_PyUnicode_LATIN1_INIT("\xb3"), \
1159+
_PyUnicode_LATIN1_INIT("\xb4"), \
1160+
_PyUnicode_LATIN1_INIT("\xb5"), \
1161+
_PyUnicode_LATIN1_INIT("\xb6"), \
1162+
_PyUnicode_LATIN1_INIT("\xb7"), \
1163+
_PyUnicode_LATIN1_INIT("\xb8"), \
1164+
_PyUnicode_LATIN1_INIT("\xb9"), \
1165+
_PyUnicode_LATIN1_INIT("\xba"), \
1166+
_PyUnicode_LATIN1_INIT("\xbb"), \
1167+
_PyUnicode_LATIN1_INIT("\xbc"), \
1168+
_PyUnicode_LATIN1_INIT("\xbd"), \
1169+
_PyUnicode_LATIN1_INIT("\xbe"), \
1170+
_PyUnicode_LATIN1_INIT("\xbf"), \
1171+
_PyUnicode_LATIN1_INIT("\xc0"), \
1172+
_PyUnicode_LATIN1_INIT("\xc1"), \
1173+
_PyUnicode_LATIN1_INIT("\xc2"), \
1174+
_PyUnicode_LATIN1_INIT("\xc3"), \
1175+
_PyUnicode_LATIN1_INIT("\xc4"), \
1176+
_PyUnicode_LATIN1_INIT("\xc5"), \
1177+
_PyUnicode_LATIN1_INIT("\xc6"), \
1178+
_PyUnicode_LATIN1_INIT("\xc7"), \
1179+
_PyUnicode_LATIN1_INIT("\xc8"), \
1180+
_PyUnicode_LATIN1_INIT("\xc9"), \
1181+
_PyUnicode_LATIN1_INIT("\xca"), \
1182+
_PyUnicode_LATIN1_INIT("\xcb"), \
1183+
_PyUnicode_LATIN1_INIT("\xcc"), \
1184+
_PyUnicode_LATIN1_INIT("\xcd"), \
1185+
_PyUnicode_LATIN1_INIT("\xce"), \
1186+
_PyUnicode_LATIN1_INIT("\xcf"), \
1187+
_PyUnicode_LATIN1_INIT("\xd0"), \
1188+
_PyUnicode_LATIN1_INIT("\xd1"), \
1189+
_PyUnicode_LATIN1_INIT("\xd2"), \
1190+
_PyUnicode_LATIN1_INIT("\xd3"), \
1191+
_PyUnicode_LATIN1_INIT("\xd4"), \
1192+
_PyUnicode_LATIN1_INIT("\xd5"), \
1193+
_PyUnicode_LATIN1_INIT("\xd6"), \
1194+
_PyUnicode_LATIN1_INIT("\xd7"), \
1195+
_PyUnicode_LATIN1_INIT("\xd8"), \
1196+
_PyUnicode_LATIN1_INIT("\xd9"), \
1197+
_PyUnicode_LATIN1_INIT("\xda"), \
1198+
_PyUnicode_LATIN1_INIT("\xdb"), \
1199+
_PyUnicode_LATIN1_INIT("\xdc"), \
1200+
_PyUnicode_LATIN1_INIT("\xdd"), \
1201+
_PyUnicode_LATIN1_INIT("\xde"), \
1202+
_PyUnicode_LATIN1_INIT("\xdf"), \
1203+
_PyUnicode_LATIN1_INIT("\xe0"), \
1204+
_PyUnicode_LATIN1_INIT("\xe1"), \
1205+
_PyUnicode_LATIN1_INIT("\xe2"), \
1206+
_PyUnicode_LATIN1_INIT("\xe3"), \
1207+
_PyUnicode_LATIN1_INIT("\xe4"), \
1208+
_PyUnicode_LATIN1_INIT("\xe5"), \
1209+
_PyUnicode_LATIN1_INIT("\xe6"), \
1210+
_PyUnicode_LATIN1_INIT("\xe7"), \
1211+
_PyUnicode_LATIN1_INIT("\xe8"), \
1212+
_PyUnicode_LATIN1_INIT("\xe9"), \
1213+
_PyUnicode_LATIN1_INIT("\xea"), \
1214+
_PyUnicode_LATIN1_INIT("\xeb"), \
1215+
_PyUnicode_LATIN1_INIT("\xec"), \
1216+
_PyUnicode_LATIN1_INIT("\xed"), \
1217+
_PyUnicode_LATIN1_INIT("\xee"), \
1218+
_PyUnicode_LATIN1_INIT("\xef"), \
1219+
_PyUnicode_LATIN1_INIT("\xf0"), \
1220+
_PyUnicode_LATIN1_INIT("\xf1"), \
1221+
_PyUnicode_LATIN1_INIT("\xf2"), \
1222+
_PyUnicode_LATIN1_INIT("\xf3"), \
1223+
_PyUnicode_LATIN1_INIT("\xf4"), \
1224+
_PyUnicode_LATIN1_INIT("\xf5"), \
1225+
_PyUnicode_LATIN1_INIT("\xf6"), \
1226+
_PyUnicode_LATIN1_INIT("\xf7"), \
1227+
_PyUnicode_LATIN1_INIT("\xf8"), \
1228+
_PyUnicode_LATIN1_INIT("\xf9"), \
1229+
_PyUnicode_LATIN1_INIT("\xfa"), \
1230+
_PyUnicode_LATIN1_INIT("\xfb"), \
1231+
_PyUnicode_LATIN1_INIT("\xfc"), \
1232+
_PyUnicode_LATIN1_INIT("\xfd"), \
1233+
_PyUnicode_LATIN1_INIT("\xfe"), \
1234+
_PyUnicode_LATIN1_INIT("\xff"), \
1235+
}, \
9681236
}, \
9691237
\
9701238
.tuple_empty = { \

Include/internal/pycore_unicodeobject.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,6 @@ struct _Py_unicode_ids {
4444
};
4545

4646
struct _Py_unicode_state {
47-
/* Single character Unicode strings in the Latin-1 range are being
48-
shared as well. */
49-
PyObject *latin1[256];
5047
struct _Py_unicode_fs_codec fs_codec;
5148

5249
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Statically allocate and initialize the latin1 characters.

0 commit comments

Comments
 (0)