Skip to content

Commit f30d3c6

Browse files
Yuki IzumiYuki Izumi
authored andcommitted
Arena allocator
This allocator allocates a 4MiB arena into which all allocations are made, and then increasingly larger arenas as earlier ones are used up. Freeing memory in the arena is a no-op: clean all memory with cmark_arena_reset(). In order to support realloc, we store the size of each allocation in a size_t before the returned pointer. The speedup is over 25% on large (benchmark-sized) inputs -- we pay a small increase in maximum RSS (~10%) for this.
1 parent cec3765 commit f30d3c6

File tree

15 files changed

+257
-36
lines changed

15 files changed

+257
-36
lines changed

man/man3/cmark.3

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,27 @@ typedef struct cmark_mem {
115115
Defines the memory allocation functions to be used by CMark when parsing
116116
and allocating a document tree
117117

118+
.PP
119+
\fIcmark_mem *\f[] \fBcmark_get_default_mem_allocator\f[](\fI\f[])
120+
121+
.PP
122+
The default memory allocator; uses the system's calloc, realloc and
123+
free.
124+
125+
.PP
126+
\fIcmark_mem *\f[] \fBcmark_get_arena_mem_allocator\f[](\fI\f[])
127+
128+
.PP
129+
An arena allocator; uses system calloc to allocate large slabs of
130+
memory. Memory in these slabs is not reused at all.
131+
132+
.PP
133+
\fIvoid\f[] \fBcmark_arena_reset\f[](\fIvoid\f[])
134+
135+
.PP
136+
Resets the arena allocator, quickly returning all used memory to the
137+
operating system.
138+
118139
.SS
119140
Creating and Destroying Nodes
120141

@@ -646,6 +667,13 @@ Rendering
646667
Render a \f[I]node\f[] tree as XML. It is the caller's responsibility to
647668
free the returned buffer.
648669

670+
.PP
671+
\fIchar *\f[] \fBcmark_render_xml_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[])
672+
673+
.PP
674+
As for \f[I]cmark_render_xml\f[], but specifying the allocator to use
675+
for the resulting string.
676+
649677
.PP
650678
\fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[])
651679

@@ -654,27 +682,55 @@ Render a \f[I]node\f[] tree as an HTML fragment. It is up to the user to
654682
add an appropriate header and footer. It is the caller's responsibility
655683
to free the returned buffer.
656684

685+
.PP
686+
\fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[])
687+
688+
.PP
689+
As for \f[I]cmark_render_html\f[], but specifying the allocator to use
690+
for the resulting string.
691+
657692
.PP
658693
\fIchar *\f[] \fBcmark_render_man\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[])
659694

660695
.PP
661696
Render a \f[I]node\f[] tree as a groff man page, without the header. It
662697
is the caller's responsibility to free the returned buffer.
663698

699+
.PP
700+
\fIchar *\f[] \fBcmark_render_man_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[])
701+
702+
.PP
703+
As for \f[I]cmark_render_man\f[], but specifying the allocator to use
704+
for the resulting string.
705+
664706
.PP
665707
\fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[])
666708

667709
.PP
668710
Render a \f[I]node\f[] tree as a commonmark document. It is the caller's
669711
responsibility to free the returned buffer.
670712

713+
.PP
714+
\fIchar *\f[] \fBcmark_render_commonmark_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[])
715+
716+
.PP
717+
As for \f[I]cmark_render_commonmark\f[], but specifying the allocator to
718+
use for the resulting string.
719+
671720
.PP
672721
\fIchar *\f[] \fBcmark_render_latex\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[])
673722

674723
.PP
675724
Render a \f[I]node\f[] tree as a LaTeX document. It is the caller's
676725
responsibility to free the returned buffer.
677726

727+
.PP
728+
\fIchar *\f[] \fBcmark_render_latex_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[])
729+
730+
.PP
731+
As for \f[I]cmark_render_latex\f[], but specifying the allocator to use
732+
for the resulting string.
733+
678734
.SS
679735
Options
680736

src/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ set(LIBRARY_SOURCES
4040
houdini_html_e.c
4141
houdini_html_u.c
4242
cmark_ctype.c
43+
arena.c
4344
${HEADERS}
4445
)
4546

@@ -64,7 +65,7 @@ set_target_properties(${PROGRAM} PROPERTIES
6465
COMPILE_FLAGS -DCMARK_STATIC_DEFINE)
6566

6667
# Check integrity of node structure when compiled as debug:
67-
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES")
68+
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES -DDEBUG")
6869
set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}")
6970

7071
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")

src/arena.c

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include <stdlib.h>
2+
#include <string.h>
3+
#include <stdint.h>
4+
#include "cmark.h"
5+
6+
static struct arena_chunk {
7+
size_t sz, used;
8+
void *ptr;
9+
struct arena_chunk *prev;
10+
} *A = NULL;
11+
12+
static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
13+
struct arena_chunk *c = calloc(1, sizeof(*c));
14+
if (!c)
15+
abort();
16+
c->sz = sz;
17+
c->ptr = calloc(1, sz);
18+
c->prev = prev;
19+
return c;
20+
}
21+
22+
static void init_arena(void) {
23+
A = alloc_arena_chunk(4 * 1048576, NULL);
24+
}
25+
26+
void cmark_arena_reset(void) {
27+
while (A) {
28+
free(A->ptr);
29+
struct arena_chunk *n = A->prev;
30+
free(A);
31+
A = n;
32+
}
33+
}
34+
35+
static void *arena_calloc(size_t nmem, size_t size) {
36+
if (!A)
37+
init_arena();
38+
39+
size_t sz = nmem * size + sizeof(size_t);
40+
if (sz > A->sz) {
41+
A->prev = alloc_arena_chunk(sz, A->prev);
42+
return (uint8_t *) A->prev->ptr + sizeof(size_t);
43+
}
44+
if (sz > A->sz - A->used) {
45+
A = alloc_arena_chunk(A->sz + A->sz / 2, A);
46+
}
47+
void *ptr = (uint8_t *) A->ptr + A->used;
48+
A->used += sz;
49+
*((size_t *) ptr) = nmem * size;
50+
return (uint8_t *) ptr + sizeof(size_t);
51+
}
52+
53+
static void *arena_realloc(void *ptr, size_t size) {
54+
if (!A)
55+
init_arena();
56+
57+
void *new_ptr = arena_calloc(1, size);
58+
if (ptr)
59+
memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
60+
return new_ptr;
61+
}
62+
63+
static void arena_free(void *ptr) {
64+
(void) ptr;
65+
/* no-op */
66+
}
67+
68+
cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
69+
70+
cmark_mem *cmark_get_arena_mem_allocator() {
71+
return &CMARK_ARENA_MEM_ALLOCATOR;
72+
}

src/blocks.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
108108
}
109109

110110
cmark_parser *cmark_parser_new(int options) {
111-
extern cmark_mem DEFAULT_MEM_ALLOCATOR;
112-
return cmark_parser_new_with_mem(options, &DEFAULT_MEM_ALLOCATOR);
111+
extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
112+
return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR);
113113
}
114114

115115
void cmark_parser_free(cmark_parser *parser) {

src/cmark.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ static void *xrealloc(void *ptr, size_t size) {
2828
return new_ptr;
2929
}
3030

31-
cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
31+
cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
32+
33+
cmark_mem *cmark_get_default_mem_allocator() {
34+
return &CMARK_DEFAULT_MEM_ALLOCATOR;
35+
}
3236

3337
char *cmark_markdown_to_html(const char *text, size_t len, int options) {
3438
cmark_node *doc;

src/cmark.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,24 @@ typedef struct cmark_mem {
100100
void (*free)(void *);
101101
} cmark_mem;
102102

103+
/** The default memory allocator; uses the system's calloc,
104+
* realloc and free.
105+
*/
106+
CMARK_EXPORT
107+
cmark_mem *cmark_get_default_mem_allocator();
108+
109+
/** An arena allocator; uses system calloc to allocate large
110+
* slabs of memory. Memory in these slabs is not reused at all.
111+
*/
112+
CMARK_EXPORT
113+
cmark_mem *cmark_get_arena_mem_allocator();
114+
115+
/** Resets the arena allocator, quickly returning all used memory
116+
* to the operating system.
117+
*/
118+
CMARK_EXPORT
119+
void cmark_arena_reset(void);
120+
103121
/**
104122
* ## Creating and Destroying Nodes
105123
*/
@@ -507,31 +525,61 @@ cmark_node *cmark_parse_file(FILE *f, int options);
507525
CMARK_EXPORT
508526
char *cmark_render_xml(cmark_node *root, int options);
509527

528+
/** As for 'cmark_render_xml', but specifying the allocator to use for
529+
* the resulting string.
530+
*/
531+
CMARK_EXPORT
532+
char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem);
533+
510534
/** Render a 'node' tree as an HTML fragment. It is up to the user
511535
* to add an appropriate header and footer. It is the caller's
512536
* responsibility to free the returned buffer.
513537
*/
514538
CMARK_EXPORT
515539
char *cmark_render_html(cmark_node *root, int options);
516540

541+
/** As for 'cmark_render_html', but specifying the allocator to use for
542+
* the resulting string.
543+
*/
544+
CMARK_EXPORT
545+
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem);
546+
517547
/** Render a 'node' tree as a groff man page, without the header.
518548
* It is the caller's responsibility to free the returned buffer.
519549
*/
520550
CMARK_EXPORT
521551
char *cmark_render_man(cmark_node *root, int options, int width);
522552

553+
/** As for 'cmark_render_man', but specifying the allocator to use for
554+
* the resulting string.
555+
*/
556+
CMARK_EXPORT
557+
char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
558+
523559
/** Render a 'node' tree as a commonmark document.
524560
* It is the caller's responsibility to free the returned buffer.
525561
*/
526562
CMARK_EXPORT
527563
char *cmark_render_commonmark(cmark_node *root, int options, int width);
528564

565+
/** As for 'cmark_render_commonmark', but specifying the allocator to use for
566+
* the resulting string.
567+
*/
568+
CMARK_EXPORT
569+
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
570+
529571
/** Render a 'node' tree as a LaTeX document.
530572
* It is the caller's responsibility to free the returned buffer.
531573
*/
532574
CMARK_EXPORT
533575
char *cmark_render_latex(cmark_node *root, int options, int width);
534576

577+
/** As for 'cmark_render_latex', but specifying the allocator to use for
578+
* the resulting string.
579+
*/
580+
CMARK_EXPORT
581+
char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
582+
535583
/**
536584
* ## Options
537585
*/

src/commonmark.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,10 +466,14 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
466466
}
467467

468468
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
469+
return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root));
470+
}
471+
472+
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
469473
if (options & CMARK_OPT_HARDBREAKS) {
470474
// disable breaking on width, since it has
471475
// a different meaning with OPT_HARDBREAKS
472476
width = 0;
473477
}
474-
return cmark_render(root, options, width, outc, S_render_node);
478+
return cmark_render(mem, root, options, width, outc, S_render_node);
475479
}

src/html.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,12 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
323323
}
324324

325325
char *cmark_render_html(cmark_node *root, int options) {
326+
return cmark_render_html_with_mem(root, options, cmark_node_mem(root));
327+
}
328+
329+
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem) {
326330
char *result;
327-
cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root));
331+
cmark_strbuf html = CMARK_BUF_INIT(mem);
328332
cmark_event_type ev_type;
329333
cmark_node *cur;
330334
struct render_state state = {&html, NULL};

src/latex.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,5 +449,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
449449
}
450450

451451
char *cmark_render_latex(cmark_node *root, int options, int width) {
452-
return cmark_render(root, options, width, outc, S_render_node);
452+
return cmark_render_latex_with_mem(root, options, width, cmark_node_mem(root));
453+
}
454+
455+
char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
456+
return cmark_render(mem, root, options, width, outc, S_render_node);
453457
}

0 commit comments

Comments
 (0)