Skip to content

Commit 4f103d1

Browse files
author
Yuki Izumi
committed
Arena allocator
This allocator allocates a 4MiB arena into which all allocations are made, and then increasingly larger arenas as earlier ones are used up. Freeing memory in the arena is a no-op: clean all memory with cmark_arena_reset(). In order to support realloc, we store the size of each allocation in a size_t before the returned pointer. The speedup is over 25% on large (benchmark-sized) inputs -- we pay a small increase in maximum RSS (~10%) for this.
1 parent 0618a8a commit 4f103d1

File tree

15 files changed

+257
-36
lines changed

15 files changed

+257
-36
lines changed

man/man3/cmark.3

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,27 @@ typedef struct cmark_mem {
115115
Defines the memory allocation functions to be used by CMark when parsing
116116
and allocating a document tree
117117

118+
.PP
119+
\fIcmark_mem *\f[] \fBcmark_get_default_mem_allocator\f[](\fI\f[])
120+
121+
.PP
122+
The default memory allocator; uses the system's calloc, realloc and
123+
free.
124+
125+
.PP
126+
\fIcmark_mem *\f[] \fBcmark_get_arena_mem_allocator\f[](\fI\f[])
127+
128+
.PP
129+
An arena allocator; uses system calloc to allocate large slabs of
130+
memory. Memory in these slabs is not reused at all.
131+
132+
.PP
133+
\fIvoid\f[] \fBcmark_arena_reset\f[](\fIvoid\f[])
134+
135+
.PP
136+
Resets the arena allocator, quickly returning all used memory to the
137+
operating system.
138+
118139
.SS
119140
Creating and Destroying Nodes
120141

@@ -641,6 +662,13 @@ Rendering
641662
Render a \f[I]node\f[] tree as XML. It is the caller's responsibility to
642663
free the returned buffer.
643664

665+
.PP
666+
\fIchar *\f[] \fBcmark_render_xml_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[])
667+
668+
.PP
669+
As for \f[I]cmark_render_xml\f[], but specifying the allocator to use
670+
for the resulting string.
671+
644672
.PP
645673
\fIchar *\f[] \fBcmark_render_html\f[](\fIcmark_node *root\f[], \fIint options\f[])
646674

@@ -649,27 +677,55 @@ Render a \f[I]node\f[] tree as an HTML fragment. It is up to the user to
649677
add an appropriate header and footer. It is the caller's responsibility
650678
to free the returned buffer.
651679

680+
.PP
681+
\fIchar *\f[] \fBcmark_render_html_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIcmark_mem *mem\f[])
682+
683+
.PP
684+
As for \f[I]cmark_render_html\f[], but specifying the allocator to use
685+
for the resulting string.
686+
652687
.PP
653688
\fIchar *\f[] \fBcmark_render_man\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[])
654689

655690
.PP
656691
Render a \f[I]node\f[] tree as a groff man page, without the header. It
657692
is the caller's responsibility to free the returned buffer.
658693

694+
.PP
695+
\fIchar *\f[] \fBcmark_render_man_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[])
696+
697+
.PP
698+
As for \f[I]cmark_render_man\f[], but specifying the allocator to use
699+
for the resulting string.
700+
659701
.PP
660702
\fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[])
661703

662704
.PP
663705
Render a \f[I]node\f[] tree as a commonmark document. It is the caller's
664706
responsibility to free the returned buffer.
665707

708+
.PP
709+
\fIchar *\f[] \fBcmark_render_commonmark_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[])
710+
711+
.PP
712+
As for \f[I]cmark_render_commonmark\f[], but specifying the allocator to
713+
use for the resulting string.
714+
666715
.PP
667716
\fIchar *\f[] \fBcmark_render_latex\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[])
668717

669718
.PP
670719
Render a \f[I]node\f[] tree as a LaTeX document. It is the caller's
671720
responsibility to free the returned buffer.
672721

722+
.PP
723+
\fIchar *\f[] \fBcmark_render_latex_with_mem\f[](\fIcmark_node *root\f[], \fIint options\f[], \fIint width\f[], \fIcmark_mem *mem\f[])
724+
725+
.PP
726+
As for \f[I]cmark_render_latex\f[], but specifying the allocator to use
727+
for the resulting string.
728+
673729
.SS
674730
Options
675731

src/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ set(LIBRARY_SOURCES
4040
houdini_html_e.c
4141
houdini_html_u.c
4242
cmark_ctype.c
43+
arena.c
4344
${HEADERS}
4445
)
4546

@@ -78,7 +79,7 @@ set_target_properties(${PROGRAM} PROPERTIES
7879
COMPILE_FLAGS -DCMARK_STATIC_DEFINE)
7980

8081
# Check integrity of node structure when compiled as debug:
81-
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES")
82+
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES -DDEBUG")
8283
set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}")
8384

8485
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")

src/arena.c

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include <stdlib.h>
2+
#include <string.h>
3+
#include <stdint.h>
4+
#include "cmark.h"
5+
6+
static struct arena_chunk {
7+
size_t sz, used;
8+
void *ptr;
9+
struct arena_chunk *prev;
10+
} *A = NULL;
11+
12+
static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
13+
struct arena_chunk *c = calloc(1, sizeof(*c));
14+
if (!c)
15+
abort();
16+
c->sz = sz;
17+
c->ptr = calloc(1, sz);
18+
c->prev = prev;
19+
return c;
20+
}
21+
22+
static void init_arena(void) {
23+
A = alloc_arena_chunk(4 * 1048576, NULL);
24+
}
25+
26+
void cmark_arena_reset(void) {
27+
while (A) {
28+
free(A->ptr);
29+
struct arena_chunk *n = A->prev;
30+
free(A);
31+
A = n;
32+
}
33+
}
34+
35+
static void *arena_calloc(size_t nmem, size_t size) {
36+
if (!A)
37+
init_arena();
38+
39+
size_t sz = nmem * size + sizeof(size_t);
40+
if (sz > A->sz) {
41+
A->prev = alloc_arena_chunk(sz, A->prev);
42+
return (uint8_t *) A->prev->ptr + sizeof(size_t);
43+
}
44+
if (sz > A->sz - A->used) {
45+
A = alloc_arena_chunk(A->sz + A->sz / 2, A);
46+
}
47+
void *ptr = (uint8_t *) A->ptr + A->used;
48+
A->used += sz;
49+
*((size_t *) ptr) = nmem * size;
50+
return (uint8_t *) ptr + sizeof(size_t);
51+
}
52+
53+
static void *arena_realloc(void *ptr, size_t size) {
54+
if (!A)
55+
init_arena();
56+
57+
void *new_ptr = arena_calloc(1, size);
58+
if (ptr)
59+
memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
60+
return new_ptr;
61+
}
62+
63+
static void arena_free(void *ptr) {
64+
(void) ptr;
65+
/* no-op */
66+
}
67+
68+
cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
69+
70+
cmark_mem *cmark_get_arena_mem_allocator() {
71+
return &CMARK_ARENA_MEM_ALLOCATOR;
72+
}

src/blocks.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
108108
}
109109

110110
cmark_parser *cmark_parser_new(int options) {
111-
extern cmark_mem DEFAULT_MEM_ALLOCATOR;
112-
return cmark_parser_new_with_mem(options, &DEFAULT_MEM_ALLOCATOR);
111+
extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
112+
return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR);
113113
}
114114

115115
void cmark_parser_free(cmark_parser *parser) {

src/cmark.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@ static void *xrealloc(void *ptr, size_t size) {
2424
return new_ptr;
2525
}
2626

27-
cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
27+
cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
28+
29+
cmark_mem *cmark_get_default_mem_allocator() {
30+
return &CMARK_DEFAULT_MEM_ALLOCATOR;
31+
}
2832

2933
char *cmark_markdown_to_html(const char *text, size_t len, int options) {
3034
cmark_node *doc;

src/cmark.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,24 @@ typedef struct cmark_mem {
100100
void (*free)(void *);
101101
} cmark_mem;
102102

103+
/** The default memory allocator; uses the system's calloc,
104+
* realloc and free.
105+
*/
106+
CMARK_EXPORT
107+
cmark_mem *cmark_get_default_mem_allocator();
108+
109+
/** An arena allocator; uses system calloc to allocate large
110+
* slabs of memory. Memory in these slabs is not reused at all.
111+
*/
112+
CMARK_EXPORT
113+
cmark_mem *cmark_get_arena_mem_allocator();
114+
115+
/** Resets the arena allocator, quickly returning all used memory
116+
* to the operating system.
117+
*/
118+
CMARK_EXPORT
119+
void cmark_arena_reset(void);
120+
103121
/**
104122
* ## Creating and Destroying Nodes
105123
*/
@@ -502,31 +520,61 @@ cmark_node *cmark_parse_file(FILE *f, int options);
502520
CMARK_EXPORT
503521
char *cmark_render_xml(cmark_node *root, int options);
504522

523+
/** As for 'cmark_render_xml', but specifying the allocator to use for
524+
* the resulting string.
525+
*/
526+
CMARK_EXPORT
527+
char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem);
528+
505529
/** Render a 'node' tree as an HTML fragment. It is up to the user
506530
* to add an appropriate header and footer. It is the caller's
507531
* responsibility to free the returned buffer.
508532
*/
509533
CMARK_EXPORT
510534
char *cmark_render_html(cmark_node *root, int options);
511535

536+
/** As for 'cmark_render_html', but specifying the allocator to use for
537+
* the resulting string.
538+
*/
539+
CMARK_EXPORT
540+
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem);
541+
512542
/** Render a 'node' tree as a groff man page, without the header.
513543
* It is the caller's responsibility to free the returned buffer.
514544
*/
515545
CMARK_EXPORT
516546
char *cmark_render_man(cmark_node *root, int options, int width);
517547

548+
/** As for 'cmark_render_man', but specifying the allocator to use for
549+
* the resulting string.
550+
*/
551+
CMARK_EXPORT
552+
char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
553+
518554
/** Render a 'node' tree as a commonmark document.
519555
* It is the caller's responsibility to free the returned buffer.
520556
*/
521557
CMARK_EXPORT
522558
char *cmark_render_commonmark(cmark_node *root, int options, int width);
523559

560+
/** As for 'cmark_render_commonmark', but specifying the allocator to use for
561+
* the resulting string.
562+
*/
563+
CMARK_EXPORT
564+
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
565+
524566
/** Render a 'node' tree as a LaTeX document.
525567
* It is the caller's responsibility to free the returned buffer.
526568
*/
527569
CMARK_EXPORT
528570
char *cmark_render_latex(cmark_node *root, int options, int width);
529571

572+
/** As for 'cmark_render_latex', but specifying the allocator to use for
573+
* the resulting string.
574+
*/
575+
CMARK_EXPORT
576+
char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
577+
530578
/**
531579
* ## Options
532580
*/

src/commonmark.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,14 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
463463
}
464464

465465
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
466+
return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root));
467+
}
468+
469+
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
466470
if (options & CMARK_OPT_HARDBREAKS) {
467471
// disable breaking on width, since it has
468472
// a different meaning with OPT_HARDBREAKS
469473
width = 0;
470474
}
471-
return cmark_render(root, options, width, outc, S_render_node);
475+
return cmark_render(mem, root, options, width, outc, S_render_node);
472476
}

src/html.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,12 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
323323
}
324324

325325
char *cmark_render_html(cmark_node *root, int options) {
326+
return cmark_render_html_with_mem(root, options, cmark_node_mem(root));
327+
}
328+
329+
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_mem *mem) {
326330
char *result;
327-
cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root));
331+
cmark_strbuf html = CMARK_BUF_INIT(mem);
328332
cmark_event_type ev_type;
329333
cmark_node *cur;
330334
struct render_state state = {&html, NULL};

src/latex.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,5 +434,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
434434
}
435435

436436
char *cmark_render_latex(cmark_node *root, int options, int width) {
437-
return cmark_render(root, options, width, outc, S_render_node);
437+
return cmark_render_latex_with_mem(root, options, width, cmark_node_mem(root));
438+
}
439+
440+
char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
441+
return cmark_render(mem, root, options, width, outc, S_render_node);
438442
}

0 commit comments

Comments
 (0)