Skip to content

fix some dotenv parsing #6987

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions shared-bindings/dotenv/__init__.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,13 @@
//| A subset of the CPython `dotenv library <https://saurabh-kumar.com/python-dotenv/>`_. It does
//| not support variables or double quotes.
//|
//| The simplest way to define keys and values is to put them in single quotes. \ and ' are
//| escaped by \ in single quotes. Newlines can occur in quotes for multiline values. Comments
//| start with # and apply for the rest of the line.
//| Keys and values may be put in single quotes.
//| ``\`` and ``'`` are escaped by ``\`` in single quotes. Newlines can occur in quotes for multiline values.
//| Comments start with ``#`` and apply for the rest of the line.
//| A ``#`` immediately following an ``=`` is part of the value, not the start of a comment,
//| and a ``#`` embedded in a value without whitespace will be part of that value.
//| This corresponds to how assignments and comments work in most Unix shells.
//|
//|
//| File format example:
//|
Expand All @@ -58,6 +62,9 @@
//| multiline = 'hello
//| world
//| how are you?'
//| # The #'s below will be included in the value. They do not start a comment.
//| key6=#value
//| key7=abc#def
//|
//| """
//|
Expand Down
115 changes: 67 additions & 48 deletions shared-module/dotenv/__init__.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,72 +35,90 @@
#include "py/objstr.h"
#include "supervisor/filesystem.h"

STATIC uint8_t consume_spaces(FIL *active_file) {
uint8_t character = ' ';
UINT quantity_read = 1;
while (unichar_isspace(character) && quantity_read > 0) {
f_read(active_file, &character, 1, &quantity_read);
}
// Return 0 if there is no next character (EOF).
STATIC uint8_t get_next_character(FIL *active_file) {
uint8_t character = 0;
UINT quantity_read;
// If there's an error or quantity_read is 0, character will remain 0.
f_read(active_file, &character, 1, &quantity_read);
return character;
}

// Discard whitespace, except for newlines, returning the next character after the whitespace.
// Return 0 if there is no next character (EOF).
STATIC uint8_t consume_whitespace(FIL *active_file) {
uint8_t character;
do {
character = get_next_character(active_file);
} while (character != '\n' && character != 0 && unichar_isspace(character));
return character;
}

// Starting at the start of a new line, determines if the key matches the given
// key. File pointer is left after the = after the key.
// key. File pointer is set to be just before the = after the key.
STATIC bool key_matches(FIL *active_file, const char *key) {
uint8_t character = ' ';
UINT quantity_read = 1;
character = consume_spaces(active_file);
uint8_t character;
character = consume_whitespace(active_file);
if (character == 0) {
return false;
}
bool quoted = false;
if (character == '\'') {
// Beginning of single-quoted string.
quoted = true;
f_read(active_file, &character, 1, &quantity_read);
character = get_next_character(active_file);
}
size_t key_pos = 0;
bool escaped = false;
bool matches = true;
size_t key_len = strlen(key);
while (quantity_read > 0) {
while (character != 0) {
if (character == '\\' && !escaped && quoted) {
escaped = true;
} else if (!escaped && quoted && character == '\'') {
quoted = false;
// Move past the quoted before breaking so we can check the validity of data past it.
f_read(active_file, &character, 1, &quantity_read);
// End of quoted key. Skip over the ending quote.
character = get_next_character(active_file);
break;
} else if (!quoted && (unichar_isspace(character) || character == '=' || character == '\n' || character == '#')) {
} else if (!quoted && (unichar_isspace(character) || character == '=' || character == '\n' || character == '#' || character == 0)) {
// End of unquoted key.
break;
} else {
matches = matches && key[key_pos] == character;
escaped = false;
key_pos++;
// Still on tentative key; see if it matches the next supplied key character,
// but don't run off the end of the supplied key.
if (key_pos < key_len) {
matches = matches && key[key_pos] == character;
escaped = false;
key_pos++;
} else {
// Key on line is too long.
matches = false;
}
}

f_read(active_file, &character, 1, &quantity_read);
character = get_next_character(active_file);
}
if (unichar_isspace(character)) {
character = consume_spaces(active_file);
}
if (character == '=' || character == '\n' || character == '#') {
// Rewind one so the value can find it.
if (character == '=' || character == '\n' || character == '#' || character == 0) {
// Rewind one so the value, if any, can be found.
f_lseek(active_file, f_tell(active_file) - 1);
} else {
// We're followed by something else that is invalid syntax.
matches = false;
}

return matches && key_pos == key_len;
}

STATIC bool next_line(FIL *active_file) {
uint8_t character = ' ';
UINT quantity_read = 1;
uint8_t character;
bool quoted = false;
bool escaped = false;
// Track comments because they last until the end of the line.
bool comment = false;
FRESULT result = FR_OK;
// Consume all characters while quoted or others up to \n.
while (result == FR_OK && quantity_read > 0 && (quoted || character != '\n')) {
if (character == '#' || comment) {
do {
character = get_next_character(active_file);

if ((!quoted || character == '#') || comment) {
// Comments consume any escaping.
comment = true;
} else if (!escaped) {
Expand All @@ -112,33 +130,32 @@ STATIC bool next_line(FIL *active_file) {
} else {
escaped = false;
}
result = f_read(active_file, &character, 1, &quantity_read);
}
return result == FR_OK && quantity_read > 0;
} while (character != 0 && (quoted || character != '\n'));

return character != 0;
}

STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) {
uint8_t character = ' ';
UINT quantity_read = 1;
// Consume spaces before =
character = consume_spaces(active_file);
uint8_t character;
// Consume spaces before "=", and get first character of interest.
character = consume_whitespace(active_file);
if (character != '=') {
if (character == '#' || character == '\n') {
// Keys without an = after them are valid with the value None.
return 0;
return -1;
}
// All other characters are invalid.
return -1;
}
character = ' ';
// Consume space after =
while (unichar_isspace(character) && quantity_read > 0) {
f_read(active_file, &character, 1, &quantity_read);
if (character != '#') {
// a # immediately after = is part of the value!
character = consume_whitespace(active_file);
}
bool quoted = false;
if (character == '\'') {
quoted = true;
f_read(active_file, &character, 1, &quantity_read);
character = get_next_character(active_file);
}
if (character == '"') {
// We don't support double quoted values.
Expand All @@ -150,20 +167,21 @@ STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) {
// Count trailing spaces so we can ignore them at the end of unquoted
// values.
size_t trailing_spaces = 0;
while (quantity_read > 0) {
bool first_char = true;
while (character != 0) {
// Consume the first \ if the value is quoted.
if (quoted && character == '\\' && !escaped) {
escaped = true;
// Drop this slash by short circuiting the rest of the loop.
f_read(active_file, &character, 1, &quantity_read);
// Drop this backslash by short circuiting the rest of the loop.
character = get_next_character(active_file);
continue;
}
if (quoted && !escaped && character == '\'') {
// trailing ' means the value is done.
break;
}
// Unquoted values are ended by a newline or comment.
if (!quoted && (character == '\n' || character == '#')) {
if (!quoted && (character == '\n' || (character == '#' && !first_char))) {
if (character == '\n') {
// Rewind one so the next_line can find the \n.
f_lseek(active_file, f_tell(active_file) - 1);
Expand All @@ -182,7 +200,8 @@ STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) {
value[value_pos] = character;
}
value_pos++;
f_read(active_file, &character, 1, &quantity_read);
character = get_next_character(active_file);
first_char = false;
}

return value_pos - trailing_spaces;
Expand Down Expand Up @@ -214,7 +233,7 @@ mp_obj_t common_hal_dotenv_get_key(const char *path, const char *key) {
// the length.
char value[64];
mp_int_t actual_len = dotenv_get_key(path, key, value, sizeof(value));
if (actual_len <= 0) {
if (actual_len < 0) {
return mp_const_none;
}
if ((size_t)actual_len >= sizeof(value)) {
Expand Down
32 changes: 32 additions & 0 deletions tests/circuitpython-manual/dotenv_test.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# No e0 value
# comment preceded by spaces
e1=e1value
e2=e2value # value followed by a comment
e3='e3value'
e4='e4value' # quoted value followed by a comment
# e5 should be None
e5
# e6 should be the empty string
e6=
# e7 should be '#' (bash-like syntax processing)
e7=#
# e8 should be the empty string
e8=''
# e9 should be the empty string
e9= #
e10=e10_first
e10=e10_last
e11='abc#def'
# e12 should be 'abc#def'
e12=abc#def
e12='multi
line'
e13=e13value
e14 #comment
e15 = e15value
# e16 should be '#'
e16=# #
# e17 should be 'def#hi'
e17='def'#hi
# e18 should be '#has a hash'
e18=#has a hash
23 changes: 23 additions & 0 deletions tests/circuitpython-manual/dotenv_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import dotenv

FILE = "dotenv_test.env"

print("e0", dotenv.get_key(FILE, "e0"))
print("e1", dotenv.get_key(FILE, "e1"))
print("e2", dotenv.get_key(FILE, "e2"))
print("e3", dotenv.get_key(FILE, "e3"))
print("e4", dotenv.get_key(FILE, "e4"))
print("e5", dotenv.get_key(FILE, "e5"))
print("e6", dotenv.get_key(FILE, "e6"))
print("e7", dotenv.get_key(FILE, "e7"))
print("e8", dotenv.get_key(FILE, "e8"))
print("e9", dotenv.get_key(FILE, "e9"))
print("e10", dotenv.get_key(FILE, "e10"))
print("e11", dotenv.get_key(FILE, "e11"))
print("e12", dotenv.get_key(FILE, "e12"))
print("e13", dotenv.get_key(FILE, "e13"))
print("e14", dotenv.get_key(FILE, "e14"))
print("e15", dotenv.get_key(FILE, "e15"))
print("e16", dotenv.get_key(FILE, "e16"))
print("e17", dotenv.get_key(FILE, "e17"))
print("e18", dotenv.get_key(FILE, "e18"))