Skip to content

Commit 42f4bdd

Browse files
authored
Fix performance regression in schema loading by stripping ruamel.yaml metadata, (#83)
avoiding unnecessary copies, and using shallow copies.
1 parent d21fa84 commit 42f4bdd

File tree

1 file changed

+21
-8
lines changed

1 file changed

+21
-8
lines changed

schema_salad/schema.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,6 @@ def replace_type(items, spec, loader, found):
332332
# type: (Any, Dict[unicode, Any], Loader, Set[unicode]) -> Any
333333
""" Go through and replace types in the 'spec' mapping"""
334334

335-
items = copy.deepcopy(items)
336335
if isinstance(items, dict):
337336
# recursively check these fields for types to replace
338337
if "type" in items and items["type"] in ("record", "enum"):
@@ -342,6 +341,7 @@ def replace_type(items, spec, loader, found):
342341
else:
343342
found.add(items["name"])
344343

344+
items = copy.copy(items)
345345
for n in ("type", "items", "fields"):
346346
if n in items:
347347
items[n] = replace_type(items[n], spec, loader, found)
@@ -388,8 +388,8 @@ def make_valid_avro(items, # type: Avro
388388
union=False # type: bool
389389
):
390390
# type: (...) -> Union[Avro, Dict]
391-
items = copy.deepcopy(items)
392391
if isinstance(items, dict):
392+
items = copy.copy(items)
393393
if items.get("name"):
394394
items["name"] = avro_name(items["name"])
395395

@@ -424,19 +424,31 @@ def make_valid_avro(items, # type: Avro
424424
items = avro_name(items)
425425
return items
426426

427+
def deepcopy_strip(item): # type: (Any) -> Any
428+
"""Make a deep copy of list and dict objects.
429+
430+
Intentionally do not copy attributes. This is to discard CommentedMap and
431+
CommentedSeq metadata which is very expensive with regular copy.deepcopy.
432+
433+
"""
434+
435+
if isinstance(item, dict):
436+
return {k: deepcopy_strip(v) for k,v in item.iteritems()}
437+
elif isinstance(item, list):
438+
return [deepcopy_strip(k) for k in item]
439+
else:
440+
return item
427441

428442
def extend_and_specialize(items, loader):
429443
# type: (List[Dict[unicode, Any]], Loader) -> List[Dict[unicode, Any]]
430444
"""Apply 'extend' and 'specialize' to fully materialize derived record
431445
types."""
432446

433-
types = {} # type: Dict[unicode, Any]
434-
for t in items:
435-
types[t["name"]] = t
447+
items = deepcopy_strip(items)
448+
types = {t["name"]: t for t in items} # type: Dict[unicode, Any]
436449
n = []
437450

438451
for t in items:
439-
t = copy.deepcopy(t)
440452
if "extends" in t:
441453
spec = {} # type: Dict[unicode, unicode]
442454
if "specialize" in t:
@@ -450,7 +462,7 @@ def extend_and_specialize(items, loader):
450462
raise Exception("Extends %s in %s refers to invalid base type" % (
451463
t["extends"], t["name"]))
452464

453-
basetype = copy.deepcopy(types[ex])
465+
basetype = copy.copy(types[ex])
454466

455467
if t["type"] == "record":
456468
if spec:
@@ -466,6 +478,7 @@ def extend_and_specialize(items, loader):
466478
exsym.extend(basetype.get("symbols", []))
467479

468480
if t["type"] == "record":
481+
t = copy.copy(t)
469482
exfields.extend(t.get("fields", []))
470483
t["fields"] = exfields
471484

@@ -477,6 +490,7 @@ def extend_and_specialize(items, loader):
477490
else:
478491
fieldnames.add(field["name"])
479492
elif t["type"] == "enum":
493+
t = copy.copy(t)
480494
exsym.extend(t.get("symbols", []))
481495
t["symbol"] = exsym
482496

@@ -507,7 +521,6 @@ def extend_and_specialize(items, loader):
507521

508522
return n
509523

510-
511524
def make_avro_schema(i, # type: List[Dict[unicode, Any]]
512525
loader # type: Loader
513526
):

0 commit comments

Comments
 (0)