Skip to content

Commit f674e0f

Browse files
committed
Merge remote-tracking branch 'origin/main' into tiktoken
2 parents f4f1f80 + 90d7d07 commit f674e0f

File tree

3 files changed

+26
-9
lines changed

3 files changed

+26
-9
lines changed

backends/vulkan/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ list(APPEND VULKAN_CXX_FLAGS "-DUSE_VULKAN_VOLK")
6161
# vulkan API files
6262

6363
file(GLOB_RECURSE vulkan_api_cpp ${RUNTIME_PATH}/api/*)
64+
file(GLOB_RECURSE vulkan_vkapi_cpp ${RUNTIME_PATH}/vk_api/*)
65+
list(APPEND vulkan_api_cpp ${vulkan_vkapi_cpp})
6466
list(APPEND vulkan_api_cpp ${VOLK_PATH}/volk.c)
6567

6668
# vulkan ComputeGraph files

backends/vulkan/runtime/gen_vulkan_spv.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import re
1616
import sys
1717
from itertools import product
18+
from multiprocessing.pool import ThreadPool
1819

1920
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
2021
import subprocess
@@ -620,9 +621,12 @@ def constructOutputMap(self) -> None:
620621

621622
def generateSPV(self, output_dir: str) -> Dict[str, str]:
622623
output_file_map = {}
623-
for shader_name in self.output_shader_map:
624-
source_glsl = self.output_shader_map[shader_name][0]
625-
shader_params = self.output_shader_map[shader_name][1]
624+
625+
def process_shader(shader_paths_pair):
626+
shader_name = shader_paths_pair[0]
627+
628+
source_glsl = shader_paths_pair[1][0]
629+
shader_params = shader_paths_pair[1][1]
626630

627631
with codecs.open(source_glsl, "r", encoding="utf-8") as input_file:
628632
input_text = input_file.read()
@@ -652,9 +656,15 @@ def generateSPV(self, output_dir: str) -> Dict[str, str]:
652656
]
653657

654658
print("glslc cmd:", cmd)
655-
# pyre-ignore
656659
subprocess.check_call(cmd)
657660

661+
return (spv_out_path, glsl_out_path)
662+
663+
# Parallelize shader compilation as much as possible to optimize build time.
664+
with ThreadPool(os.cpu_count()) as pool:
665+
for spv_out_path, glsl_out_path in pool.map(
666+
process_shader, self.output_shader_map.items()
667+
):
658668
output_file_map[spv_out_path] = glsl_out_path
659669

660670
return output_file_map

exir/lowered_backend_module.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,15 @@ def buffer(
139139
segment_alignment: int = 4096,
140140
constant_tensor_alignment: Optional[int] = None,
141141
delegate_alignment: Optional[int] = None,
142+
memory_planning: MemoryPlanningPass = None,
142143
) -> bytes:
143144
"""
144145
Returns a buffer containing the serialized ExecuTorch binary.
145146
"""
146147
# TODO(T181463742): avoid calling bytes(..) which incurs large copies.
147148
out = bytes(
148149
_serialize_pte_binary(
149-
program=self.program(),
150+
program=self.program(memory_planning=memory_planning),
150151
extract_delegate_segments=extract_delegate_segments,
151152
segment_alignment=segment_alignment,
152153
constant_tensor_alignment=constant_tensor_alignment,
@@ -157,7 +158,11 @@ def buffer(
157158

158159
# TODO(chenlai): re-consider recapture instead of manually constructing the program because
159160
# the meta data construction is done manually.
160-
def program(self, emit_stacktrace: bool = False) -> Program:
161+
def program(
162+
self,
163+
emit_stacktrace: bool = False,
164+
memory_planning: MemoryPlanningPass = None,
165+
) -> Program:
161166
# Fix autodpes introuces cyclic dependencies:
162167
# program -> verifier -> lowered_backend_module -> program
163168
# @manual
@@ -319,9 +324,9 @@ def program(self, emit_stacktrace: bool = False) -> Program:
319324
example_inputs=None,
320325
verifier=lowered_exported_program.verifier,
321326
)
322-
exported_program = _transform(
323-
exported_program, SpecPropPass(), MemoryPlanningPass("greedy")
324-
)
327+
if memory_planning is None:
328+
memory_planning = MemoryPlanningPass("greedy")
329+
exported_program = _transform(exported_program, SpecPropPass(), memory_planning)
325330
emitted_program = emit_program(
326331
exported_program, emit_stacktrace=emit_stacktrace
327332
).program

0 commit comments

Comments
 (0)