Skip to content

Commit 1d410bb

Browse files
committed
Fix repl support for unicode
Currently when a utf8 character that is bigger than 1 byte is typed in the repl, it isn't handled how it should be. If you try to move the cursor in any direction the text gets messed up. This fixes that.
1 parent fee3c0a commit 1d410bb

File tree

1 file changed

+86
-3
lines changed

1 file changed

+86
-3
lines changed

lib/mp-readline/readline.c

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ STATIC readline_t rl;
100100

101101
int readline_process_char(int c) {
102102
size_t last_line_len = rl.line->len;
103+
int cont_chars = 0;
103104
int redraw_step_back = 0;
104105
bool redraw_from_cursor = false;
105106
int redraw_step_forward = 0;
@@ -178,6 +179,13 @@ int readline_process_char(int c) {
178179
int nspace = 1;
179180
#endif
180181

182+
// Check if we have moved into a UTF-8 continuation byte
183+
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-nspace]) &&
184+
rl.cursor_pos-nspace > rl.orig_line_len) {
185+
nspace++;
186+
cont_chars++;
187+
}
188+
181189
// do the backspace
182190
vstr_cut_out_bytes(rl.line, rl.cursor_pos - nspace, nspace);
183191
// set redraw parameters
@@ -206,12 +214,37 @@ int readline_process_char(int c) {
206214
redraw_step_forward = compl_len;
207215
}
208216
#endif
209-
} else if (32 <= c ) {
217+
} else if (32 <= c && c <= 126) {
210218
// printable character
211219
vstr_ins_char(rl.line, rl.cursor_pos, c);
212220
// set redraw parameters
213221
redraw_from_cursor = true;
214222
redraw_step_forward = 1;
223+
}else if (c >= 128) {
224+
// utf-8 character
225+
if (c >= 0xc0 && c < 0xf8) {
226+
// First Code Point
227+
vstr_ins_char(rl.line, rl.cursor_pos, c);
228+
}else if (UTF8_IS_CONT(c)) {
229+
char fcp = rl.line->buf[rl.cursor_pos];
230+
if (fcp >= 0xc0 && fcp < 0xf8) {
231+
int need = (0xe5 >> ((fcp >> 3) & 0x6)) & 3; // From unicode.c L195
232+
cont_chars++;
233+
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+cont_chars]) &&
234+
rl.cursor_pos+cont_chars < rl.line->len && cont_chars < need) {
235+
cont_chars++;
236+
}
237+
vstr_ins_char(rl.line, rl.cursor_pos+cont_chars, c);
238+
if (cont_chars == need) {
239+
redraw_from_cursor = true;
240+
redraw_step_forward = cont_chars+1;
241+
}
242+
}else{
243+
//ignore, for now (invalid first code point)
244+
}
245+
}else {
246+
// ignore, invalid
247+
}
215248
}
216249
} else if (rl.escape_seq == ESEQ_ESC) {
217250
switch (c) {
@@ -237,6 +270,12 @@ int readline_process_char(int c) {
237270
#endif
238271
// up arrow
239272
if (rl.hist_cur + 1 < (int)READLINE_HIST_SIZE && MP_STATE_PORT(readline_hist)[rl.hist_cur + 1] != NULL) {
273+
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
274+
// printf("char: %d\n", ch);
275+
if (UTF8_IS_CONT(*ch)) {
276+
cont_chars++;
277+
}
278+
}
240279
// increase hist num
241280
rl.hist_cur += 1;
242281
// set line to history
@@ -253,6 +292,12 @@ int readline_process_char(int c) {
253292
#endif
254293
// down arrow
255294
if (rl.hist_cur >= 0) {
295+
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
296+
// printf("char: %d\n", ch);
297+
if (UTF8_IS_CONT(*ch)) {
298+
cont_chars++;
299+
}
300+
}
256301
// decrease hist num
257302
rl.hist_cur -= 1;
258303
// set line to history
@@ -272,6 +317,12 @@ int readline_process_char(int c) {
272317
// right arrow
273318
if (rl.cursor_pos < rl.line->len) {
274319
redraw_step_forward = 1;
320+
// Check if we have moved into a UTF-8 continuation byte
321+
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+redraw_step_forward]) &&
322+
rl.cursor_pos+redraw_step_forward < rl.line->len) {
323+
redraw_step_forward++;
324+
cont_chars++;
325+
}
275326
}
276327
} else if (c == 'D') {
277328
#if MICROPY_REPL_EMACS_KEYS
@@ -280,6 +331,12 @@ int readline_process_char(int c) {
280331
// left arrow
281332
if (rl.cursor_pos > rl.orig_line_len) {
282333
redraw_step_back = 1;
334+
// Check if we have moved into a UTF-8 continuation byte
335+
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-redraw_step_back]) &&
336+
rl.cursor_pos-redraw_step_back > rl.orig_line_len) {
337+
redraw_step_back++;
338+
cont_chars++;
339+
}
283340
}
284341
} else if (c == 'H') {
285342
// home
@@ -295,9 +352,21 @@ int readline_process_char(int c) {
295352
if (c == '~') {
296353
if (rl.escape_seq_buf[0] == '1' || rl.escape_seq_buf[0] == '7') {
297354
home_key:
355+
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
356+
// printf("char: %d\n", ch);
357+
if (UTF8_IS_CONT(*ch)) {
358+
cont_chars++;
359+
}
360+
}
298361
redraw_step_back = rl.cursor_pos - rl.orig_line_len;
299362
} else if (rl.escape_seq_buf[0] == '4' || rl.escape_seq_buf[0] == '8') {
300363
end_key:
364+
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
365+
// printf("char: %d\n", ch);
366+
if (UTF8_IS_CONT(*ch)) {
367+
cont_chars++;
368+
}
369+
}
301370
redraw_step_forward = rl.line->len - rl.cursor_pos;
302371
} else if (rl.escape_seq_buf[0] == '3') {
303372
// delete
@@ -331,18 +400,32 @@ int readline_process_char(int c) {
331400

332401
// redraw command prompt, efficiently
333402
if (redraw_step_back > 0) {
334-
mp_hal_move_cursor_back(redraw_step_back);
403+
mp_hal_move_cursor_back(redraw_step_back-cont_chars);
335404
rl.cursor_pos -= redraw_step_back;
336405
}
337406
if (redraw_from_cursor) {
338407
if (rl.line->len < last_line_len) {
339408
// erase old chars
340409
mp_hal_erase_line_from_cursor(last_line_len - rl.cursor_pos);
341410
}
411+
// Check if we have moved into a UTF-8 continuation byte
412+
// while (rl.cursor_pos+redraw_step_forward < rl.line->len &&
413+
// UTF8_IS_CONT(rl.line->buf[rl.cursor_pos]) && rl.cursor_pos > 0) {
414+
// rl.cursor_pos--;
415+
// redraw_step_forward++;
416+
// }
417+
418+
cont_chars = 0;
419+
for (char *ch = rl.line->buf+rl.cursor_pos+redraw_step_forward; ch < rl.line->buf+rl.line->len; ch++) {
420+
// printf("char: %d\n", ch);
421+
if (UTF8_IS_CONT(*ch)) {
422+
cont_chars++;
423+
}
424+
}
342425
// draw new chars
343426
mp_hal_stdout_tx_strn(rl.line->buf + rl.cursor_pos, rl.line->len - rl.cursor_pos);
344427
// move cursor forward if needed (already moved forward by length of line, so move it back)
345-
mp_hal_move_cursor_back(rl.line->len - (rl.cursor_pos + redraw_step_forward));
428+
mp_hal_move_cursor_back(rl.line->len - (rl.cursor_pos + redraw_step_forward) - cont_chars);
346429
rl.cursor_pos += redraw_step_forward;
347430
} else if (redraw_step_forward > 0) {
348431
// draw over old chars to move cursor forwards

0 commit comments

Comments
 (0)