Skip to content

Commit 1fd09cb

Browse files
authored
Merge pull request #6987 from dhalbert/dotenv-fixes
fix some dotenv parsing
2 parents 7f72280 + 6dc03ae commit 1fd09cb

File tree

4 files changed

+132
-51
lines changed

4 files changed

+132
-51
lines changed

shared-bindings/dotenv/__init__.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,13 @@
4040
//| A subset of the CPython `dotenv library <https://saurabh-kumar.com/python-dotenv/>`_. It does
4141
//| not support variables or double quotes.
4242
//|
43-
//| The simplest way to define keys and values is to put them in single quotes. \ and ' are
44-
//| escaped by \ in single quotes. Newlines can occur in quotes for multiline values. Comments
45-
//| start with # and apply for the rest of the line.
43+
//| Keys and values may be put in single quotes.
44+
//| ``\`` and ``'`` are escaped by ``\`` in single quotes. Newlines can occur in quotes for multiline values.
45+
//| Comments start with ``#`` and apply for the rest of the line.
46+
//| A ``#`` immediately following an ``=`` is part of the value, not the start of a comment,
47+
//| and a ``#`` embedded in a value without whitespace will be part of that value.
48+
//| This corresponds to how assignments and comments work in most Unix shells.
49+
//|
4650
//|
4751
//| File format example:
4852
//|
@@ -58,6 +62,9 @@
5862
//| multiline = 'hello
5963
//| world
6064
//| how are you?'
65+
//| # The #'s below will be included in the value. They do not start a comment.
66+
//| key6=#value
67+
//| key7=abc#def
6168
//|
6269
//| """
6370
//|

shared-module/dotenv/__init__.c

Lines changed: 67 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -35,72 +35,90 @@
3535
#include "py/objstr.h"
3636
#include "supervisor/filesystem.h"
3737

38-
STATIC uint8_t consume_spaces(FIL *active_file) {
39-
uint8_t character = ' ';
40-
UINT quantity_read = 1;
41-
while (unichar_isspace(character) && quantity_read > 0) {
42-
f_read(active_file, &character, 1, &quantity_read);
43-
}
38+
// Return 0 if there is no next character (EOF).
39+
STATIC uint8_t get_next_character(FIL *active_file) {
40+
uint8_t character = 0;
41+
UINT quantity_read;
42+
// If there's an error or quantity_read is 0, character will remain 0.
43+
f_read(active_file, &character, 1, &quantity_read);
44+
return character;
45+
}
46+
47+
// Discard whitespace, except for newlines, returning the next character after the whitespace.
48+
// Return 0 if there is no next character (EOF).
49+
STATIC uint8_t consume_whitespace(FIL *active_file) {
50+
uint8_t character;
51+
do {
52+
character = get_next_character(active_file);
53+
} while (character != '\n' && character != 0 && unichar_isspace(character));
4454
return character;
4555
}
4656

4757
// Starting at the start of a new line, determines if the key matches the given
48-
// key. File pointer is left after the = after the key.
58+
// key. File pointer is set to be just before the = after the key.
4959
STATIC bool key_matches(FIL *active_file, const char *key) {
50-
uint8_t character = ' ';
51-
UINT quantity_read = 1;
52-
character = consume_spaces(active_file);
60+
uint8_t character;
61+
character = consume_whitespace(active_file);
62+
if (character == 0) {
63+
return false;
64+
}
5365
bool quoted = false;
5466
if (character == '\'') {
67+
// Beginning of single-quoted string.
5568
quoted = true;
56-
f_read(active_file, &character, 1, &quantity_read);
69+
character = get_next_character(active_file);
5770
}
5871
size_t key_pos = 0;
5972
bool escaped = false;
6073
bool matches = true;
6174
size_t key_len = strlen(key);
62-
while (quantity_read > 0) {
75+
while (character != 0) {
6376
if (character == '\\' && !escaped && quoted) {
6477
escaped = true;
6578
} else if (!escaped && quoted && character == '\'') {
6679
quoted = false;
67-
// Move past the quoted before breaking so we can check the validity of data past it.
68-
f_read(active_file, &character, 1, &quantity_read);
80+
// End of quoted key. Skip over the ending quote.
81+
character = get_next_character(active_file);
6982
break;
70-
} else if (!quoted && (unichar_isspace(character) || character == '=' || character == '\n' || character == '#')) {
83+
} else if (!quoted && (unichar_isspace(character) || character == '=' || character == '\n' || character == '#' || character == 0)) {
84+
// End of unquoted key.
7185
break;
7286
} else {
73-
matches = matches && key[key_pos] == character;
74-
escaped = false;
75-
key_pos++;
87+
// Still on tentative key; see if it matches the next supplied key character,
88+
// but don't run off the end of the supplied key.
89+
if (key_pos < key_len) {
90+
matches = matches && key[key_pos] == character;
91+
escaped = false;
92+
key_pos++;
93+
} else {
94+
// Key on line is too long.
95+
matches = false;
96+
}
7697
}
77-
78-
f_read(active_file, &character, 1, &quantity_read);
98+
character = get_next_character(active_file);
7999
}
80-
if (unichar_isspace(character)) {
81-
character = consume_spaces(active_file);
82-
}
83-
if (character == '=' || character == '\n' || character == '#') {
84-
// Rewind one so the value can find it.
100+
if (character == '=' || character == '\n' || character == '#' || character == 0) {
101+
// Rewind one so the value, if any, can be found.
85102
f_lseek(active_file, f_tell(active_file) - 1);
86103
} else {
87104
// We're followed by something else that is invalid syntax.
88105
matches = false;
89106
}
107+
90108
return matches && key_pos == key_len;
91109
}
92110

93111
STATIC bool next_line(FIL *active_file) {
94-
uint8_t character = ' ';
95-
UINT quantity_read = 1;
112+
uint8_t character;
96113
bool quoted = false;
97114
bool escaped = false;
98115
// Track comments because they last until the end of the line.
99116
bool comment = false;
100-
FRESULT result = FR_OK;
101117
// Consume all characters while quoted or others up to \n.
102-
while (result == FR_OK && quantity_read > 0 && (quoted || character != '\n')) {
103-
if (character == '#' || comment) {
118+
do {
119+
character = get_next_character(active_file);
120+
121+
if ((!quoted || character == '#') || comment) {
104122
// Comments consume any escaping.
105123
comment = true;
106124
} else if (!escaped) {
@@ -112,33 +130,32 @@ STATIC bool next_line(FIL *active_file) {
112130
} else {
113131
escaped = false;
114132
}
115-
result = f_read(active_file, &character, 1, &quantity_read);
116-
}
117-
return result == FR_OK && quantity_read > 0;
133+
} while (character != 0 && (quoted || character != '\n'));
134+
135+
return character != 0;
118136
}
119137

120138
STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) {
121-
uint8_t character = ' ';
122-
UINT quantity_read = 1;
123-
// Consume spaces before =
124-
character = consume_spaces(active_file);
139+
uint8_t character;
140+
// Consume spaces before "=", and get first character of interest.
141+
character = consume_whitespace(active_file);
125142
if (character != '=') {
126143
if (character == '#' || character == '\n') {
127144
// Keys without an = after them are valid with the value None.
128-
return 0;
145+
return -1;
129146
}
130147
// All other characters are invalid.
131148
return -1;
132149
}
133-
character = ' ';
134150
// Consume space after =
135-
while (unichar_isspace(character) && quantity_read > 0) {
136-
f_read(active_file, &character, 1, &quantity_read);
151+
if (character != '#') {
152+
// a # immediately after = is part of the value!
153+
character = consume_whitespace(active_file);
137154
}
138155
bool quoted = false;
139156
if (character == '\'') {
140157
quoted = true;
141-
f_read(active_file, &character, 1, &quantity_read);
158+
character = get_next_character(active_file);
142159
}
143160
if (character == '"') {
144161
// We don't support double quoted values.
@@ -150,20 +167,21 @@ STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) {
150167
// Count trailing spaces so we can ignore them at the end of unquoted
151168
// values.
152169
size_t trailing_spaces = 0;
153-
while (quantity_read > 0) {
170+
bool first_char = true;
171+
while (character != 0) {
154172
// Consume the first \ if the value is quoted.
155173
if (quoted && character == '\\' && !escaped) {
156174
escaped = true;
157-
// Drop this slash by short circuiting the rest of the loop.
158-
f_read(active_file, &character, 1, &quantity_read);
175+
// Drop this backslash by short circuiting the rest of the loop.
176+
character = get_next_character(active_file);
159177
continue;
160178
}
161179
if (quoted && !escaped && character == '\'') {
162180
// trailing ' means the value is done.
163181
break;
164182
}
165183
// Unquoted values are ended by a newline or comment.
166-
if (!quoted && (character == '\n' || character == '#')) {
184+
if (!quoted && (character == '\n' || (character == '#' && !first_char))) {
167185
if (character == '\n') {
168186
// Rewind one so the next_line can find the \n.
169187
f_lseek(active_file, f_tell(active_file) - 1);
@@ -182,7 +200,8 @@ STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) {
182200
value[value_pos] = character;
183201
}
184202
value_pos++;
185-
f_read(active_file, &character, 1, &quantity_read);
203+
character = get_next_character(active_file);
204+
first_char = false;
186205
}
187206

188207
return value_pos - trailing_spaces;
@@ -214,7 +233,7 @@ mp_obj_t common_hal_dotenv_get_key(const char *path, const char *key) {
214233
// the length.
215234
char value[64];
216235
mp_int_t actual_len = dotenv_get_key(path, key, value, sizeof(value));
217-
if (actual_len <= 0) {
236+
if (actual_len < 0) {
218237
return mp_const_none;
219238
}
220239
if ((size_t)actual_len >= sizeof(value)) {
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# No e0 value
2+
# comment preceded by spaces
3+
e1=e1value
4+
e2=e2value # value followed by a comment
5+
e3='e3value'
6+
e4='e4value' # quoted value followed by a comment
7+
# e5 should be None
8+
e5
9+
# e6 should be the empty string
10+
e6=
11+
# e7 should be '#' (bash-like syntax processing)
12+
e7=#
13+
# e8 should be the empty string
14+
e8=''
15+
# e9 should be the empty string
16+
e9= #
17+
e10=e10_first
18+
e10=e10_last
19+
e11='abc#def'
20+
# e12 should be 'abc#def'
21+
e12=abc#def
22+
e12='multi
23+
line'
24+
e13=e13value
25+
e14 #comment
26+
e15 = e15value
27+
# e16 should be '#'
28+
e16=# #
29+
# e17 should be 'def#hi'
30+
e17='def'#hi
31+
# e18 should be '#has a hash'
32+
e18=#has a hash
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import dotenv
2+
3+
FILE = "dotenv_test.env"
4+
5+
print("e0", dotenv.get_key(FILE, "e0"))
6+
print("e1", dotenv.get_key(FILE, "e1"))
7+
print("e2", dotenv.get_key(FILE, "e2"))
8+
print("e3", dotenv.get_key(FILE, "e3"))
9+
print("e4", dotenv.get_key(FILE, "e4"))
10+
print("e5", dotenv.get_key(FILE, "e5"))
11+
print("e6", dotenv.get_key(FILE, "e6"))
12+
print("e7", dotenv.get_key(FILE, "e7"))
13+
print("e8", dotenv.get_key(FILE, "e8"))
14+
print("e9", dotenv.get_key(FILE, "e9"))
15+
print("e10", dotenv.get_key(FILE, "e10"))
16+
print("e11", dotenv.get_key(FILE, "e11"))
17+
print("e12", dotenv.get_key(FILE, "e12"))
18+
print("e13", dotenv.get_key(FILE, "e13"))
19+
print("e14", dotenv.get_key(FILE, "e14"))
20+
print("e15", dotenv.get_key(FILE, "e15"))
21+
print("e16", dotenv.get_key(FILE, "e16"))
22+
print("e17", dotenv.get_key(FILE, "e17"))
23+
print("e18", dotenv.get_key(FILE, "e18"))

0 commit comments

Comments
 (0)