Skip to content

Commit bccfb8b

Browse files
authored
Merge pull request #3243 from dunkmann00/support-utf8-repl-take-one
Better support for utf-8 repl
2 parents 4fd474d + 398be76 commit bccfb8b

File tree

1 file changed

+58
-9
lines changed

1 file changed

+58
-9
lines changed

lib/mp-readline/readline.c

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,16 @@ STATIC char *str_dup_maybe(const char *str) {
5858
return s2;
5959
}
6060

61+
STATIC size_t count_cont_bytes(char *start, char *end) {
62+
int count = 0;
63+
for (char *pos = start; pos < end; pos++) {
64+
if(UTF8_IS_CONT(*pos)) {
65+
count++;
66+
}
67+
}
68+
return count;
69+
}
70+
6171
// By default assume terminal which implements VT100 commands...
6272
#ifndef MICROPY_HAL_HAS_VT100
6373
#define MICROPY_HAL_HAS_VT100 (1)
@@ -92,14 +102,16 @@ typedef struct _readline_t {
92102
int escape_seq;
93103
int hist_cur;
94104
size_t cursor_pos;
105+
uint8_t utf8_cont_chars;
95106
char escape_seq_buf[1];
96107
const char *prompt;
97108
} readline_t;
98109

99110
STATIC readline_t rl;
100111

101112
int readline_process_char(int c) {
102-
size_t last_line_len = rl.line->len;
113+
size_t last_line_len = utf8_charlen((byte *)rl.line->buf, rl.line->len);
114+
int cont_chars = 0;
103115
int redraw_step_back = 0;
104116
bool redraw_from_cursor = false;
105117
int redraw_step_forward = 0;
@@ -178,6 +190,12 @@ int readline_process_char(int c) {
178190
int nspace = 1;
179191
#endif
180192

193+
// Check if we have moved into a UTF-8 continuation byte
194+
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-nspace])) {
195+
nspace++;
196+
cont_chars++;
197+
}
198+
181199
// do the backspace
182200
vstr_cut_out_bytes(rl.line, rl.cursor_pos - nspace, nspace);
183201
// set redraw parameters
@@ -206,12 +224,27 @@ int readline_process_char(int c) {
206224
redraw_step_forward = compl_len;
207225
}
208226
#endif
209-
} else if (32 <= c ) {
227+
} else if (32 <= c) {
210228
// printable character
211-
vstr_ins_char(rl.line, rl.cursor_pos, c);
212-
// set redraw parameters
213-
redraw_from_cursor = true;
214-
redraw_step_forward = 1;
229+
char lcp = rl.line->buf[rl.cursor_pos];
230+
uint8_t cont_need = 0;
231+
if (!UTF8_IS_CONT(c)) {
232+
// ASCII or Lead code point
233+
rl.utf8_cont_chars = 0;
234+
lcp = c;
235+
}else {
236+
rl.utf8_cont_chars += 1;
237+
}
238+
if (lcp >= 0xc0 && lcp < 0xf8) {
239+
cont_need = (0xe5 >> ((lcp >> 3) & 0x6)) & 3; // From unicode.c L195
240+
}
241+
vstr_ins_char(rl.line, rl.cursor_pos+rl.utf8_cont_chars, c);
242+
// set redraw parameters if we have the entire character
243+
if (rl.utf8_cont_chars == cont_need) {
244+
redraw_from_cursor = true;
245+
redraw_step_forward = rl.utf8_cont_chars+1;
246+
cont_chars = rl.utf8_cont_chars;
247+
}
215248
}
216249
} else if (rl.escape_seq == ESEQ_ESC) {
217250
switch (c) {
@@ -237,6 +270,8 @@ int readline_process_char(int c) {
237270
#endif
238271
// up arrow
239272
if (rl.hist_cur + 1 < (int)READLINE_HIST_SIZE && MP_STATE_PORT(readline_hist)[rl.hist_cur + 1] != NULL) {
273+
// Check for continuation characters
274+
cont_chars = count_cont_bytes(rl.line->buf+rl.orig_line_len, rl.line->buf+rl.cursor_pos);
240275
// increase hist num
241276
rl.hist_cur += 1;
242277
// set line to history
@@ -253,6 +288,8 @@ int readline_process_char(int c) {
253288
#endif
254289
// down arrow
255290
if (rl.hist_cur >= 0) {
291+
// Check for continuation characters
292+
cont_chars = count_cont_bytes(rl.line->buf+rl.orig_line_len, rl.line->buf+rl.cursor_pos);
256293
// decrease hist num
257294
rl.hist_cur -= 1;
258295
// set line to history
@@ -272,6 +309,11 @@ int readline_process_char(int c) {
272309
// right arrow
273310
if (rl.cursor_pos < rl.line->len) {
274311
redraw_step_forward = 1;
312+
// Check if we have moved into a UTF-8 continuation byte
313+
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+redraw_step_forward]) &&
314+
rl.cursor_pos+redraw_step_forward < rl.line->len) {
315+
redraw_step_forward++;
316+
}
275317
}
276318
} else if (c == 'D') {
277319
#if MICROPY_REPL_EMACS_KEYS
@@ -280,6 +322,11 @@ int readline_process_char(int c) {
280322
// left arrow
281323
if (rl.cursor_pos > rl.orig_line_len) {
282324
redraw_step_back = 1;
325+
// Check if we have moved into a UTF-8 continuation byte
326+
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-redraw_step_back])) {
327+
redraw_step_back++;
328+
cont_chars++;
329+
}
283330
}
284331
} else if (c == 'H') {
285332
// home
@@ -331,18 +378,20 @@ int readline_process_char(int c) {
331378

332379
// redraw command prompt, efficiently
333380
if (redraw_step_back > 0) {
334-
mp_hal_move_cursor_back(redraw_step_back);
381+
mp_hal_move_cursor_back(redraw_step_back-cont_chars);
335382
rl.cursor_pos -= redraw_step_back;
336383
}
337384
if (redraw_from_cursor) {
338-
if (rl.line->len < last_line_len) {
385+
if (utf8_charlen((byte *)rl.line->buf, rl.line->len) < last_line_len) {
339386
// erase old chars
340387
mp_hal_erase_line_from_cursor(last_line_len - rl.cursor_pos);
341388
}
389+
// Check for continuation characters
390+
cont_chars = count_cont_bytes(rl.line->buf+rl.cursor_pos+redraw_step_forward, rl.line->buf+rl.line->len);
342391
// draw new chars
343392
mp_hal_stdout_tx_strn(rl.line->buf + rl.cursor_pos, rl.line->len - rl.cursor_pos);
344393
// move cursor forward if needed (already moved forward by length of line, so move it back)
345-
mp_hal_move_cursor_back(rl.line->len - (rl.cursor_pos + redraw_step_forward));
394+
mp_hal_move_cursor_back(rl.line->len - (rl.cursor_pos + redraw_step_forward) - cont_chars);
346395
rl.cursor_pos += redraw_step_forward;
347396
} else if (redraw_step_forward > 0) {
348397
// draw over old chars to move cursor forwards

0 commit comments

Comments
 (0)