Skip to content

Commit 8dbcf8e

Browse files
tkuchtabrowneee
authored andcommitted
[DFSAN] Add support for sscanf.
Reviewed By: browneee Differential Revision: https://reviews.llvm.org/D153775
1 parent 400d326 commit 8dbcf8e

File tree

3 files changed

+478
-5
lines changed

3 files changed

+478
-5
lines changed

compiler-rt/lib/dfsan/dfsan_custom.cpp

Lines changed: 325 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2240,8 +2240,13 @@ typedef int dfsan_label_va;
22402240
// '%.3f').
22412241
struct Formatter {
22422242
Formatter(char *str_, const char *fmt_, size_t size_)
2243-
: str(str_), str_off(0), size(size_), fmt_start(fmt_), fmt_cur(fmt_),
2244-
width(-1) {}
2243+
: str(str_),
2244+
str_off(0),
2245+
size(size_),
2246+
fmt_start(fmt_),
2247+
fmt_cur(fmt_),
2248+
width(-1),
2249+
num_scanned(-1) {}
22452250

22462251
int format() {
22472252
char *tmp_fmt = build_format_string();
@@ -2266,12 +2271,50 @@ struct Formatter {
22662271
return retval;
22672272
}
22682273

2269-
char *build_format_string() {
2274+
int scan() {
2275+
char *tmp_fmt = build_format_string(true);
2276+
int read_count = 0;
2277+
int retval = sscanf(str + str_off, tmp_fmt, &read_count);
2278+
if (retval > 0) {
2279+
if (-1 == num_scanned)
2280+
num_scanned = 0;
2281+
num_scanned += retval;
2282+
}
2283+
free(tmp_fmt);
2284+
return read_count;
2285+
}
2286+
2287+
template <typename T>
2288+
int scan(T arg) {
2289+
char *tmp_fmt = build_format_string(true);
2290+
int read_count = 0;
2291+
int retval = sscanf(str + str_off, tmp_fmt, arg, &read_count);
2292+
if (retval > 0) {
2293+
if (-1 == num_scanned)
2294+
num_scanned = 0;
2295+
num_scanned += retval;
2296+
}
2297+
free(tmp_fmt);
2298+
return read_count;
2299+
}
2300+
2301+
// with_n -> toggles adding %n on/off; off by default
2302+
char *build_format_string(bool with_n = false) {
22702303
size_t fmt_size = fmt_cur - fmt_start + 1;
2271-
char *new_fmt = (char *)malloc(fmt_size + 1);
2304+
size_t add_size = 0;
2305+
if (with_n)
2306+
add_size = 2;
2307+
char *new_fmt = (char *)malloc(fmt_size + 1 + add_size);
22722308
assert(new_fmt);
22732309
internal_memcpy(new_fmt, fmt_start, fmt_size);
2274-
new_fmt[fmt_size] = '\0';
2310+
if (!with_n) {
2311+
new_fmt[fmt_size] = '\0';
2312+
} else {
2313+
new_fmt[fmt_size] = '%';
2314+
new_fmt[fmt_size + 1] = 'n';
2315+
new_fmt[fmt_size + 2] = '\0';
2316+
}
2317+
22752318
return new_fmt;
22762319
}
22772320

@@ -2303,6 +2346,7 @@ struct Formatter {
23032346
const char *fmt_start;
23042347
const char *fmt_cur;
23052348
int width;
2349+
int num_scanned;
23062350
};
23072351

23082352
// Formats the input and propagates the input labels to the output. The output
@@ -2495,13 +2539,237 @@ static int format_buffer(char *str, size_t size, const char *fmt,
24952539
return formatter.str_off;
24962540
}
24972541

2542+
// This function is an inverse of format_buffer: we take the input buffer,
2543+
// scan it in search for format strings and store the results in the varargs.
2544+
// The labels are propagated from the input buffer to the varargs.
2545+
static int scan_buffer(char *str, size_t size, const char *fmt,
2546+
dfsan_label *va_labels, dfsan_label *ret_label,
2547+
dfsan_origin *str_origin, dfsan_origin *ret_origin,
2548+
va_list ap) {
2549+
Formatter formatter(str, fmt, size);
2550+
while (*formatter.fmt_cur) {
2551+
formatter.fmt_start = formatter.fmt_cur;
2552+
formatter.width = -1;
2553+
int retval = 0;
2554+
dfsan_label l = 0;
2555+
void *dst_ptr = 0;
2556+
size_t write_size = 0;
2557+
if (*formatter.fmt_cur != '%') {
2558+
// Ordinary character. Consume all the characters until a '%' or the end
2559+
// of the string.
2560+
for (; *(formatter.fmt_cur + 1) && *(formatter.fmt_cur + 1) != '%';
2561+
++formatter.fmt_cur) {
2562+
}
2563+
retval = formatter.scan();
2564+
dfsan_set_label(0, formatter.str_cur(),
2565+
formatter.num_written_bytes(retval));
2566+
} else {
2567+
// Conversion directive. Consume all the characters until a conversion
2568+
// specifier or the end of the string.
2569+
bool end_fmt = false;
2570+
for (; *formatter.fmt_cur && !end_fmt;) {
2571+
switch (*++formatter.fmt_cur) {
2572+
case 'd':
2573+
case 'i':
2574+
case 'o':
2575+
case 'u':
2576+
case 'x':
2577+
case 'X':
2578+
switch (*(formatter.fmt_cur - 1)) {
2579+
case 'h':
2580+
// Also covers the 'hh' case (since the size of the arg is still
2581+
// an int).
2582+
dst_ptr = va_arg(ap, int *);
2583+
retval = formatter.scan((int *)dst_ptr);
2584+
write_size = sizeof(int);
2585+
break;
2586+
case 'l':
2587+
if (formatter.fmt_cur - formatter.fmt_start >= 2 &&
2588+
*(formatter.fmt_cur - 2) == 'l') {
2589+
dst_ptr = va_arg(ap, long long int *);
2590+
retval = formatter.scan((long long int *)dst_ptr);
2591+
write_size = sizeof(long long int);
2592+
} else {
2593+
dst_ptr = va_arg(ap, long int *);
2594+
retval = formatter.scan((long int *)dst_ptr);
2595+
write_size = sizeof(long int);
2596+
}
2597+
break;
2598+
case 'q':
2599+
dst_ptr = va_arg(ap, long long int *);
2600+
retval = formatter.scan((long long int *)dst_ptr);
2601+
write_size = sizeof(long long int);
2602+
break;
2603+
case 'j':
2604+
dst_ptr = va_arg(ap, intmax_t *);
2605+
retval = formatter.scan((intmax_t *)dst_ptr);
2606+
write_size = sizeof(intmax_t);
2607+
break;
2608+
case 'z':
2609+
case 't':
2610+
dst_ptr = va_arg(ap, size_t *);
2611+
retval = formatter.scan((size_t *)dst_ptr);
2612+
write_size = sizeof(size_t);
2613+
break;
2614+
default:
2615+
dst_ptr = va_arg(ap, int *);
2616+
retval = formatter.scan((int *)dst_ptr);
2617+
write_size = sizeof(int);
2618+
}
2619+
// get the label associated with the string at the corresponding
2620+
// place
2621+
l = dfsan_read_label(formatter.str_cur(),
2622+
formatter.num_written_bytes(retval));
2623+
if (str_origin == nullptr)
2624+
dfsan_set_label(l, dst_ptr, write_size);
2625+
else {
2626+
dfsan_set_label(l, dst_ptr, write_size);
2627+
size_t scan_count = formatter.num_written_bytes(retval);
2628+
size_t size = scan_count > write_size ? write_size : scan_count;
2629+
dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
2630+
}
2631+
end_fmt = true;
2632+
2633+
break;
2634+
2635+
case 'a':
2636+
case 'A':
2637+
case 'e':
2638+
case 'E':
2639+
case 'f':
2640+
case 'F':
2641+
case 'g':
2642+
case 'G':
2643+
if (*(formatter.fmt_cur - 1) == 'L') {
2644+
dst_ptr = va_arg(ap, long double *);
2645+
retval = formatter.scan((long double *)dst_ptr);
2646+
write_size = sizeof(long double);
2647+
} else if (*(formatter.fmt_cur - 1) == 'l') {
2648+
dst_ptr = va_arg(ap, double *);
2649+
retval = formatter.scan((double *)dst_ptr);
2650+
write_size = sizeof(double);
2651+
} else {
2652+
dst_ptr = va_arg(ap, float *);
2653+
retval = formatter.scan((float *)dst_ptr);
2654+
write_size = sizeof(float);
2655+
}
2656+
l = dfsan_read_label(formatter.str_cur(),
2657+
formatter.num_written_bytes(retval));
2658+
if (str_origin == nullptr)
2659+
dfsan_set_label(l, dst_ptr, write_size);
2660+
else {
2661+
dfsan_set_label(l, dst_ptr, write_size);
2662+
size_t scan_count = formatter.num_written_bytes(retval);
2663+
size_t size = scan_count > write_size ? write_size : scan_count;
2664+
dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
2665+
}
2666+
end_fmt = true;
2667+
break;
2668+
2669+
case 'c':
2670+
dst_ptr = va_arg(ap, char *);
2671+
retval = formatter.scan((char *)dst_ptr);
2672+
write_size = sizeof(char);
2673+
l = dfsan_read_label(formatter.str_cur(),
2674+
formatter.num_written_bytes(retval));
2675+
if (str_origin == nullptr)
2676+
dfsan_set_label(l, dst_ptr, write_size);
2677+
else {
2678+
dfsan_set_label(l, dst_ptr, write_size);
2679+
size_t scan_count = formatter.num_written_bytes(retval);
2680+
size_t size = scan_count > write_size ? write_size : scan_count;
2681+
dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
2682+
}
2683+
end_fmt = true;
2684+
break;
2685+
2686+
case 's': {
2687+
dst_ptr = va_arg(ap, char *);
2688+
retval = formatter.scan((char *)dst_ptr);
2689+
if (1 == retval) {
2690+
// special case: we have parsed a single string and we need to
2691+
// update retval with the string size
2692+
retval = strlen((char *)dst_ptr);
2693+
}
2694+
if (str_origin)
2695+
dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(),
2696+
formatter.num_written_bytes(retval));
2697+
va_labels++;
2698+
dfsan_mem_shadow_transfer(dst_ptr, formatter.str_cur(),
2699+
formatter.num_written_bytes(retval));
2700+
end_fmt = true;
2701+
break;
2702+
}
2703+
2704+
case 'p':
2705+
dst_ptr = va_arg(ap, void *);
2706+
retval =
2707+
formatter.scan((int *)dst_ptr); // note: changing void* to int*
2708+
// since we need to call sizeof
2709+
write_size = sizeof(int);
2710+
2711+
l = dfsan_read_label(formatter.str_cur(),
2712+
formatter.num_written_bytes(retval));
2713+
if (str_origin == nullptr)
2714+
dfsan_set_label(l, dst_ptr, write_size);
2715+
else {
2716+
dfsan_set_label(l, dst_ptr, write_size);
2717+
size_t scan_count = formatter.num_written_bytes(retval);
2718+
size_t size = scan_count > write_size ? write_size : scan_count;
2719+
dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
2720+
}
2721+
end_fmt = true;
2722+
break;
2723+
2724+
case 'n': {
2725+
int *ptr = va_arg(ap, int *);
2726+
*ptr = (int)formatter.str_off;
2727+
va_labels++;
2728+
dfsan_set_label(0, ptr, sizeof(*ptr));
2729+
end_fmt = true;
2730+
break;
2731+
}
2732+
2733+
case '%':
2734+
retval = formatter.scan();
2735+
end_fmt = true;
2736+
break;
2737+
2738+
case '*':
2739+
formatter.width = va_arg(ap, int);
2740+
va_labels++;
2741+
break;
2742+
2743+
default:
2744+
break;
2745+
}
2746+
}
2747+
}
2748+
2749+
if (retval < 0) {
2750+
return retval;
2751+
}
2752+
2753+
formatter.fmt_cur++;
2754+
formatter.str_off += retval;
2755+
}
2756+
2757+
*ret_label = 0;
2758+
if (ret_origin)
2759+
*ret_origin = 0;
2760+
2761+
// Number of items scanned in total.
2762+
return formatter.num_scanned;
2763+
}
2764+
24982765
extern "C" {
24992766
SANITIZER_INTERFACE_ATTRIBUTE
25002767
int __dfsw_sprintf(char *str, const char *format, dfsan_label str_label,
25012768
dfsan_label format_label, dfsan_label *va_labels,
25022769
dfsan_label *ret_label, ...) {
25032770
va_list ap;
25042771
va_start(ap, ret_label);
2772+
25052773
int ret = format_buffer(str, ~0ul, format, va_labels, ret_label, nullptr,
25062774
nullptr, ap);
25072775
va_end(ap);
@@ -2550,6 +2818,58 @@ int __dfso_snprintf(char *str, size_t size, const char *format,
25502818
return ret;
25512819
}
25522820

2821+
SANITIZER_INTERFACE_ATTRIBUTE
2822+
int __dfsw_sscanf(char *str, const char *format, dfsan_label str_label,
2823+
dfsan_label format_label, dfsan_label *va_labels,
2824+
dfsan_label *ret_label, ...) {
2825+
va_list ap;
2826+
va_start(ap, ret_label);
2827+
int ret = scan_buffer(str, ~0ul, format, va_labels, ret_label, nullptr,
2828+
nullptr, ap);
2829+
va_end(ap);
2830+
return ret;
2831+
}
2832+
2833+
SANITIZER_INTERFACE_ATTRIBUTE
2834+
int __dfso_sscanf(char *str, const char *format, dfsan_label str_label,
2835+
dfsan_label format_label, dfsan_label *va_labels,
2836+
dfsan_label *ret_label, dfsan_origin str_origin,
2837+
dfsan_origin format_origin, dfsan_origin *va_origins,
2838+
dfsan_origin *ret_origin, ...) {
2839+
va_list ap;
2840+
va_start(ap, ret_origin);
2841+
int ret = scan_buffer(str, ~0ul, format, va_labels, ret_label, &str_origin,
2842+
ret_origin, ap);
2843+
va_end(ap);
2844+
return ret;
2845+
}
2846+
2847+
SANITIZER_INTERFACE_ATTRIBUTE
2848+
int __dfsw___isoc99_sscanf(char *str, const char *format, dfsan_label str_label,
2849+
dfsan_label format_label, dfsan_label *va_labels,
2850+
dfsan_label *ret_label, ...) {
2851+
va_list ap;
2852+
va_start(ap, ret_label);
2853+
int ret = scan_buffer(str, ~0ul, format, va_labels, ret_label, nullptr,
2854+
nullptr, ap);
2855+
va_end(ap);
2856+
return ret;
2857+
}
2858+
2859+
SANITIZER_INTERFACE_ATTRIBUTE
2860+
int __dfso___isoc99_sscanf(char *str, const char *format, dfsan_label str_label,
2861+
dfsan_label format_label, dfsan_label *va_labels,
2862+
dfsan_label *ret_label, dfsan_origin str_origin,
2863+
dfsan_origin format_origin, dfsan_origin *va_origins,
2864+
dfsan_origin *ret_origin, ...) {
2865+
va_list ap;
2866+
va_start(ap, ret_origin);
2867+
int ret = scan_buffer(str, ~0ul, format, va_labels, ret_label, &str_origin,
2868+
ret_origin, ap);
2869+
va_end(ap);
2870+
return ret;
2871+
}
2872+
25532873
static void BeforeFork() {
25542874
StackDepotLockAll();
25552875
GetChainedOriginDepot()->LockAll();

compiler-rt/lib/dfsan/done_abilist.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,10 @@ fun:gettimeofday=custom
308308
fun:sprintf=custom
309309
fun:snprintf=custom
310310

311+
# scanf-like
312+
fun:sscanf=custom
313+
fun:__isoc99_sscanf=custom
314+
311315
# TODO: custom
312316
fun:asprintf=discard
313317
fun:qsort=discard

0 commit comments

Comments
 (0)