Skip to content

Commit bfbe5d2

Browse files
committed
Refactor utf-8 code, reduce impact on code size
1 parent 872804d commit bfbe5d2

File tree

1 file changed

+29
-59
lines changed

1 file changed

+29
-59
lines changed

lib/mp-readline/readline.c

Lines changed: 29 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,21 @@ typedef struct _readline_t {
9292
int escape_seq;
9393
int hist_cur;
9494
size_t cursor_pos;
95+
uint8_t utf8_cont_chars;
9596
char escape_seq_buf[1];
9697
const char *prompt;
9798
} readline_t;
9899

99100
STATIC readline_t rl;
100101

102+
int readline_count_cont_byte(char *start, char *end) {
103+
int count = 0;
104+
for (char *pos = start; pos < end; pos++) {
105+
count += UTF8_IS_CONT(*pos);
106+
}
107+
return count;
108+
}
109+
101110
int readline_process_char(int c) {
102111
size_t last_line_len = rl.line->len;
103112
int cont_chars = 0;
@@ -180,8 +189,7 @@ int readline_process_char(int c) {
180189
#endif
181190

182191
// Check if we have moved into a UTF-8 continuation byte
183-
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-nspace]) &&
184-
rl.cursor_pos-nspace > rl.orig_line_len) {
192+
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-nspace])) {
185193
nspace++;
186194
cont_chars++;
187195
}
@@ -223,27 +231,23 @@ int readline_process_char(int c) {
223231
}else if (c >= 128) {
224232
// utf-8 character
225233
if (c >= 0xc0 && c < 0xf8) {
226-
// First Code Point
234+
// Lead code point
227235
vstr_ins_char(rl.line, rl.cursor_pos, c);
236+
rl.utf8_cont_chars = 0;
228237
}else if (UTF8_IS_CONT(c)) {
229-
char fcp = rl.line->buf[rl.cursor_pos];
230-
if (fcp >= 0xc0 && fcp < 0xf8) {
231-
int need = (0xe5 >> ((fcp >> 3) & 0x6)) & 3; // From unicode.c L195
232-
cont_chars++;
233-
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+cont_chars]) &&
234-
rl.cursor_pos+cont_chars < rl.line->len && cont_chars < need) {
235-
cont_chars++;
236-
}
237-
vstr_ins_char(rl.line, rl.cursor_pos+cont_chars, c);
238-
if (cont_chars == need) {
238+
char lcp = rl.line->buf[rl.cursor_pos];
239+
// Check for valid lead code point
240+
if (lcp >= 0xc0 && lcp < 0xf8) {
241+
rl.utf8_cont_chars += 1;
242+
vstr_ins_char(rl.line, rl.cursor_pos+rl.utf8_cont_chars, c);
243+
// set redraw parameters if we have the entire character
244+
uint8_t need = (0xe5 >> ((lcp >> 3) & 0x6)) & 3; // From unicode.c L195
245+
if (rl.utf8_cont_chars == need) {
239246
redraw_from_cursor = true;
240-
redraw_step_forward = cont_chars+1;
247+
redraw_step_forward = rl.utf8_cont_chars+1;
248+
cont_chars = rl.utf8_cont_chars;
241249
}
242-
}else{
243-
//ignore, for now (invalid first code point)
244250
}
245-
}else {
246-
// ignore, invalid
247251
}
248252
}
249253
} else if (rl.escape_seq == ESEQ_ESC) {
@@ -270,12 +274,8 @@ int readline_process_char(int c) {
270274
#endif
271275
// up arrow
272276
if (rl.hist_cur + 1 < (int)READLINE_HIST_SIZE && MP_STATE_PORT(readline_hist)[rl.hist_cur + 1] != NULL) {
273-
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
274-
// printf("char: %d\n", ch);
275-
if (UTF8_IS_CONT(*ch)) {
276-
cont_chars++;
277-
}
278-
}
277+
// Check for continuation characters through the cursor_pos
278+
cont_chars = readline_count_cont_byte(rl.line->buf+rl.orig_line_len, rl.line->buf+rl.cursor_pos);
279279
// increase hist num
280280
rl.hist_cur += 1;
281281
// set line to history
@@ -292,12 +292,8 @@ int readline_process_char(int c) {
292292
#endif
293293
// down arrow
294294
if (rl.hist_cur >= 0) {
295-
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
296-
// printf("char: %d\n", ch);
297-
if (UTF8_IS_CONT(*ch)) {
298-
cont_chars++;
299-
}
300-
}
295+
// Check for continuation characters through the cursor_pos
296+
cont_chars = readline_count_cont_byte(rl.line->buf+rl.orig_line_len, rl.line->buf+rl.cursor_pos);
301297
// decrease hist num
302298
rl.hist_cur -= 1;
303299
// set line to history
@@ -321,7 +317,6 @@ int readline_process_char(int c) {
321317
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+redraw_step_forward]) &&
322318
rl.cursor_pos+redraw_step_forward < rl.line->len) {
323319
redraw_step_forward++;
324-
cont_chars++;
325320
}
326321
}
327322
} else if (c == 'D') {
@@ -332,8 +327,7 @@ int readline_process_char(int c) {
332327
if (rl.cursor_pos > rl.orig_line_len) {
333328
redraw_step_back = 1;
334329
// Check if we have moved into a UTF-8 continuation byte
335-
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-redraw_step_back]) &&
336-
rl.cursor_pos-redraw_step_back > rl.orig_line_len) {
330+
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-redraw_step_back])) {
337331
redraw_step_back++;
338332
cont_chars++;
339333
}
@@ -352,21 +346,9 @@ int readline_process_char(int c) {
352346
if (c == '~') {
353347
if (rl.escape_seq_buf[0] == '1' || rl.escape_seq_buf[0] == '7') {
354348
home_key:
355-
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
356-
// printf("char: %d\n", ch);
357-
if (UTF8_IS_CONT(*ch)) {
358-
cont_chars++;
359-
}
360-
}
361349
redraw_step_back = rl.cursor_pos - rl.orig_line_len;
362350
} else if (rl.escape_seq_buf[0] == '4' || rl.escape_seq_buf[0] == '8') {
363351
end_key:
364-
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
365-
// printf("char: %d\n", ch);
366-
if (UTF8_IS_CONT(*ch)) {
367-
cont_chars++;
368-
}
369-
}
370352
redraw_step_forward = rl.line->len - rl.cursor_pos;
371353
} else if (rl.escape_seq_buf[0] == '3') {
372354
// delete
@@ -408,20 +390,8 @@ int readline_process_char(int c) {
408390
// erase old chars
409391
mp_hal_erase_line_from_cursor(last_line_len - rl.cursor_pos);
410392
}
411-
// Check if we have moved into a UTF-8 continuation byte
412-
// while (rl.cursor_pos+redraw_step_forward < rl.line->len &&
413-
// UTF8_IS_CONT(rl.line->buf[rl.cursor_pos]) && rl.cursor_pos > 0) {
414-
// rl.cursor_pos--;
415-
// redraw_step_forward++;
416-
// }
417-
418-
cont_chars = 0;
419-
for (char *ch = rl.line->buf+rl.cursor_pos+redraw_step_forward; ch < rl.line->buf+rl.line->len; ch++) {
420-
// printf("char: %d\n", ch);
421-
if (UTF8_IS_CONT(*ch)) {
422-
cont_chars++;
423-
}
424-
}
393+
// Check for continuation characters from the new cursor_pos to the EOL
394+
cont_chars = readline_count_cont_byte(rl.line->buf+rl.cursor_pos+redraw_step_forward, rl.line->buf+rl.line->len);
425395
// draw new chars
426396
mp_hal_stdout_tx_strn(rl.line->buf + rl.cursor_pos, rl.line->len - rl.cursor_pos);
427397
// move cursor forward if needed (already moved forward by length of line, so move it back)

0 commit comments

Comments
 (0)