Skip to content

Commit ece9727

Browse files
committed
perform ruff lint and format on the files
1 parent 7948940 commit ece9727

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+4072
-2408
lines changed

continuous_integration/scripts/render-template.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import argparse
44
import json
55
from jinja2 import Environment, FileSystemLoader
6-
import os
76
import re
87

8+
99
# TODO: make this work for arbitrary context. ie. implement replace_using_context()
1010
def replace_placeholder(source_str, variable_name, variable_value):
1111
# Escaping any regex special characters in variable_name
@@ -14,39 +14,49 @@ def replace_placeholder(source_str, variable_name, variable_value):
1414
# Using regular expression to replace ${variable_name} with actual variable_value
1515
# \s* means any amount of whitespace (including none)
1616
# pattern = rf'\$\{{\s*\{{\s*{variable_name_escaped}\s*\}}\s*\}}'
17-
pattern = rf'<<\s*{variable_name_escaped}\s*>>'
17+
pattern = rf"<<\s*{variable_name_escaped}\s*>>"
1818
return re.sub(pattern, variable_value.strip(), source_str)
1919

20+
2021
# Setup command-line argument parsing
21-
parser = argparse.ArgumentParser(description='Render a Jinja2 template using a JSON context.')
22-
parser.add_argument('template_file', type=str, help='Path to the Jinja2 template file (with .j2 extension).')
23-
parser.add_argument('json_file', type=str, help='Path to the JSON file to use as the rendering context.')
24-
parser.add_argument('output_file', type=str, help='Path to the output file.')
22+
parser = argparse.ArgumentParser(
23+
description="Render a Jinja2 template using a JSON context."
24+
)
25+
parser.add_argument(
26+
"template_file",
27+
type=str,
28+
help="Path to the Jinja2 template file (with .j2 extension).",
29+
)
30+
parser.add_argument(
31+
"json_file", type=str, help="Path to the JSON file to use as the rendering context."
32+
)
33+
parser.add_argument("output_file", type=str, help="Path to the output file.")
2534

2635
args = parser.parse_args()
2736

2837
# Load JSON file as the rendering context
29-
with open(args.json_file, 'r') as file:
38+
with open(args.json_file, "r") as file:
3039
context = json.load(file)
3140

3241
# Setup Jinja2 environment and load the template
3342
env = Environment(
34-
loader=FileSystemLoader(searchpath='./'),
35-
variable_start_string='<<',
36-
variable_end_string='>>',
37-
block_start_string='<%',
38-
block_end_string='%>',
39-
comment_start_string='<#',
40-
comment_end_string='#>')
41-
env.filters['replace_placeholder'] = replace_placeholder
43+
loader=FileSystemLoader(searchpath="./"),
44+
variable_start_string="<<",
45+
variable_end_string=">>",
46+
block_start_string="<%",
47+
block_end_string="%>",
48+
comment_start_string="<#",
49+
comment_end_string="#>",
50+
)
51+
env.filters["replace_placeholder"] = replace_placeholder
4252

4353
template = env.get_template(args.template_file)
4454

4555
# Render the template with the context
4656
rendered_content = template.render(context)
4757
# print(rendered_content)
4858

49-
with open(args.output_file, 'w') as file:
59+
with open(args.output_file, "w") as file:
5060
file.write(rendered_content)
5161

52-
print(f'Template rendered successfully. Output saved to {args.output_file}')
62+
print(f"Template rendered successfully. Output saved to {args.output_file}")

cuda_bindings/benchmarks/kernels.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# this software. Any use, reproduction, disclosure, or distribution of
66
# this software and related documentation outside the terms of the EULA
77
# is strictly prohibited.
8-
kernel_string = '''\
8+
kernel_string = """\
99
#define ITEM_PARAM(x, T) T x
1010
#define REP1(x, T) , ITEM_PARAM(x, T)
1111
#define REP2(x, T) REP1(x##0, T) REP1(x##1, T)
@@ -160,4 +160,4 @@
160160
// Do not touch param to prevent compiler from copying
161161
// the whole structure from const bank to lmem.
162162
}
163-
'''
163+
"""

cuda_bindings/benchmarks/perf_test_utils.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,25 @@
99
from cuda import cuda, cudart, nvrtc
1010
import numpy as np
1111

12+
1213
def ASSERT_DRV(err):
1314
if isinstance(err, cuda.CUresult):
1415
if err != cuda.CUresult.CUDA_SUCCESS:
15-
raise RuntimeError('Cuda Error: {}'.format(err))
16+
raise RuntimeError("Cuda Error: {}".format(err))
1617
elif isinstance(err, cudart.cudaError_t):
1718
if err != cudart.cudaError_t.cudaSuccess:
18-
raise RuntimeError('Cudart Error: {}'.format(err))
19+
raise RuntimeError("Cudart Error: {}".format(err))
1920
elif isinstance(err, nvrtc.nvrtcResult):
2021
if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
21-
raise RuntimeError('Nvrtc Error: {}'.format(err))
22+
raise RuntimeError("Nvrtc Error: {}".format(err))
2223
else:
23-
raise RuntimeError('Unknown error type: {}'.format(err))
24+
raise RuntimeError("Unknown error type: {}".format(err))
25+
2426

2527
@pytest.fixture
2628
def init_cuda():
2729
# Initialize
28-
err, = cuda.cuInit(0)
30+
(err,) = cuda.cuInit(0)
2931
ASSERT_DRV(err)
3032
err, device = cuda.cuDeviceGet(0)
3133
ASSERT_DRV(err)
@@ -38,31 +40,42 @@ def init_cuda():
3840

3941
yield device, ctx, stream
4042

41-
err, = cuda.cuStreamDestroy(stream)
43+
(err,) = cuda.cuStreamDestroy(stream)
4244
ASSERT_DRV(err)
43-
err, = cuda.cuCtxDestroy(ctx)
45+
(err,) = cuda.cuCtxDestroy(ctx)
4446
ASSERT_DRV(err)
4547

48+
4649
@pytest.fixture
4750
def load_module():
4851
module = None
52+
4953
def _load_module(kernel_string, device):
5054
nonlocal module
5155
# Get module
52-
err, major = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device)
56+
err, major = cuda.cuDeviceGetAttribute(
57+
cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device
58+
)
5359
ASSERT_DRV(err)
54-
err, minor = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device)
60+
err, minor = cuda.cuDeviceGetAttribute(
61+
cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device
62+
)
5563
ASSERT_DRV(err)
5664

57-
err, prog = nvrtc.nvrtcCreateProgram(str.encode(kernel_string), b'kernelString.cu', 0, [], [])
65+
err, prog = nvrtc.nvrtcCreateProgram(
66+
str.encode(kernel_string), b"kernelString.cu", 0, [], []
67+
)
5868
ASSERT_DRV(err)
59-
opts = [b'--fmad=false', bytes('--gpu-architecture=sm_' + str(major) + str(minor), 'ascii')]
60-
err, = nvrtc.nvrtcCompileProgram(prog, 2, opts)
69+
opts = [
70+
b"--fmad=false",
71+
bytes("--gpu-architecture=sm_" + str(major) + str(minor), "ascii"),
72+
]
73+
(err,) = nvrtc.nvrtcCompileProgram(prog, 2, opts)
6174

6275
err_log, logSize = nvrtc.nvrtcGetProgramLogSize(prog)
6376
ASSERT_DRV(err_log)
64-
log = b' ' * logSize
65-
err_log, = nvrtc.nvrtcGetProgramLog(prog, log)
77+
log = b" " * logSize
78+
(err_log,) = nvrtc.nvrtcGetProgramLog(prog, log)
6679
ASSERT_DRV(err_log)
6780
result = log.decode()
6881
if len(result) > 1:
@@ -71,8 +84,8 @@ def _load_module(kernel_string, device):
7184
ASSERT_DRV(err)
7285
err, cubinSize = nvrtc.nvrtcGetCUBINSize(prog)
7386
ASSERT_DRV(err)
74-
cubin = b' ' * cubinSize
75-
err, = nvrtc.nvrtcGetCUBIN(prog, cubin)
87+
cubin = b" " * cubinSize
88+
(err,) = nvrtc.nvrtcGetCUBIN(prog, cubin)
7689
ASSERT_DRV(err)
7790
cubin = np.char.array(cubin)
7891
err, module = cuda.cuModuleLoadData(cubin)
@@ -82,5 +95,5 @@ def _load_module(kernel_string, device):
8295

8396
yield _load_module
8497

85-
err, = cuda.cuModuleUnload(module)
98+
(err,) = cuda.cuModuleUnload(module)
8699
ASSERT_DRV(err)

cuda_bindings/benchmarks/test_cupy.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,38 @@
1313
if not skip_tests:
1414
try:
1515
import cupy
16+
1617
skip_tests = False
1718
except ImportError:
1819
skip_tests = True
1920

2021
from .kernels import kernel_string
2122

23+
2224
def launch(kernel, args=()):
2325
kernel((1,), (1,), args)
2426

27+
2528
# Measure launch latency with no parmaeters
2629
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
2730
@pytest.mark.benchmark(group="cupy")
2831
def test_launch_latency_empty_kernel(benchmark):
2932
module = cupy.RawModule(code=kernel_string)
30-
kernel = module.get_function('empty_kernel')
33+
kernel = module.get_function("empty_kernel")
3134

3235
stream = cupy.cuda.stream.Stream(non_blocking=True)
3336

3437
with stream:
3538
benchmark(launch, kernel)
3639
stream.synchronize()
3740

41+
3842
# Measure launch latency with a single parameter
3943
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
4044
@pytest.mark.benchmark(group="cupy")
4145
def test_launch_latency_small_kernel(benchmark):
4246
module = cupy.RawModule(code=kernel_string)
43-
kernel = module.get_function('small_kernel')
47+
kernel = module.get_function("small_kernel")
4448
cupy.cuda.set_allocator()
4549
arg = cupy.cuda.alloc(ctypes.sizeof(ctypes.c_float))
4650

@@ -50,12 +54,13 @@ def test_launch_latency_small_kernel(benchmark):
5054
benchmark(launch, kernel, (arg,))
5155
stream.synchronize()
5256

57+
5358
# Measure launch latency with many parameters using builtin parameter packing
5459
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
5560
@pytest.mark.benchmark(group="cupy")
5661
def test_launch_latency_small_kernel_512_args(benchmark):
5762
module = cupy.RawModule(code=kernel_string)
58-
kernel = module.get_function('small_kernel_512_args')
63+
kernel = module.get_function("small_kernel_512_args")
5964
cupy.cuda.set_allocator()
6065

6166
args = []
@@ -69,12 +74,13 @@ def test_launch_latency_small_kernel_512_args(benchmark):
6974
benchmark(launch, kernel, args)
7075
stream.synchronize()
7176

77+
7278
# Measure launch latency with many parameters using builtin parameter packing
7379
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
7480
@pytest.mark.benchmark(group="cupy")
7581
def test_launch_latency_small_kernel_512_bools(benchmark):
7682
module = cupy.RawModule(code=kernel_string)
77-
kernel = module.get_function('small_kernel_512_bools')
83+
kernel = module.get_function("small_kernel_512_bools")
7884
cupy.cuda.set_allocator()
7985

8086
args = [True] * 512
@@ -86,12 +92,13 @@ def test_launch_latency_small_kernel_512_bools(benchmark):
8692
benchmark(launch, kernel, args)
8793
stream.synchronize()
8894

95+
8996
# Measure launch latency with many parameters using builtin parameter packing
9097
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
9198
@pytest.mark.benchmark(group="cupy")
9299
def test_launch_latency_small_kernel_512_doubles(benchmark):
93100
module = cupy.RawModule(code=kernel_string)
94-
kernel = module.get_function('small_kernel_512_doubles')
101+
kernel = module.get_function("small_kernel_512_doubles")
95102
cupy.cuda.set_allocator()
96103

97104
args = [1.2345] * 512
@@ -103,12 +110,13 @@ def test_launch_latency_small_kernel_512_doubles(benchmark):
103110
benchmark(launch, kernel, args)
104111
stream.synchronize()
105112

113+
106114
# Measure launch latency with many parameters using builtin parameter packing
107115
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
108116
@pytest.mark.benchmark(group="cupy")
109117
def test_launch_latency_small_kernel_512_ints(benchmark):
110118
module = cupy.RawModule(code=kernel_string)
111-
kernel = module.get_function('small_kernel_512_ints')
119+
kernel = module.get_function("small_kernel_512_ints")
112120
cupy.cuda.set_allocator()
113121

114122
args = [123] * 512
@@ -120,12 +128,13 @@ def test_launch_latency_small_kernel_512_ints(benchmark):
120128
benchmark(launch, kernel, args)
121129
stream.synchronize()
122130

131+
123132
# Measure launch latency with many parameters using builtin parameter packing
124133
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
125134
@pytest.mark.benchmark(group="cupy")
126135
def test_launch_latency_small_kernel_512_bytes(benchmark):
127136
module = cupy.RawModule(code=kernel_string)
128-
kernel = module.get_function('small_kernel_512_chars')
137+
kernel = module.get_function("small_kernel_512_chars")
129138
cupy.cuda.set_allocator()
130139

131140
args = [127] * 512
@@ -137,12 +146,13 @@ def test_launch_latency_small_kernel_512_bytes(benchmark):
137146
benchmark(launch, kernel, args)
138147
stream.synchronize()
139148

149+
140150
# Measure launch latency with many parameters using builtin parameter packing
141151
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
142152
@pytest.mark.benchmark(group="cupy")
143153
def test_launch_latency_small_kernel_512_longlongs(benchmark):
144154
module = cupy.RawModule(code=kernel_string)
145-
kernel = module.get_function('small_kernel_512_longlongs')
155+
kernel = module.get_function("small_kernel_512_longlongs")
146156
cupy.cuda.set_allocator()
147157

148158
args = [9223372036854775806] * 512
@@ -154,12 +164,13 @@ def test_launch_latency_small_kernel_512_longlongs(benchmark):
154164
benchmark(launch, kernel, args)
155165
stream.synchronize()
156166

167+
157168
# Measure launch latency with many parameters using builtin parameter packing
158169
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
159170
@pytest.mark.benchmark(group="cupy")
160171
def test_launch_latency_small_kernel_256_args(benchmark):
161172
module = cupy.RawModule(code=kernel_string)
162-
kernel = module.get_function('small_kernel_256_args')
173+
kernel = module.get_function("small_kernel_256_args")
163174
cupy.cuda.set_allocator()
164175

165176
args = []
@@ -173,12 +184,13 @@ def test_launch_latency_small_kernel_256_args(benchmark):
173184
benchmark(launch, kernel, args)
174185
stream.synchronize()
175186

187+
176188
# Measure launch latency with many parameters using builtin parameter packing
177189
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
178190
@pytest.mark.benchmark(group="cupy")
179191
def test_launch_latency_small_kernel_16_args(benchmark):
180192
module = cupy.RawModule(code=kernel_string)
181-
kernel = module.get_function('small_kernel_16_args')
193+
kernel = module.get_function("small_kernel_16_args")
182194
cupy.cuda.set_allocator()
183195

184196
args = []

0 commit comments

Comments
 (0)