11
11
import modal .io_streams
12
12
from enum import StrEnum , auto
13
13
from pathlib import Path
14
+ import time
14
15
from typing import Optional , Type
15
16
from types import TracebackType
16
17
27
28
create_container ,
28
29
copy_from_container ,
29
30
copy_to_container ,
30
- delete_file_from_container ,
31
31
exec_run_with_timeout ,
32
32
)
33
33
@@ -51,43 +51,29 @@ def __init__(
51
51
spec : Spec ,
52
52
logger : logging .Logger ,
53
53
timeout : int ,
54
+ num_cpus : int ,
55
+ log_dir : Path ,
56
+ files_to_copy : Optional [Files ] = None ,
54
57
):
55
58
"""Create the remote execution context
56
59
57
- The execution context will persist for the lifetime of this object.
58
60
The execution context can be a Docker container or Modal sandbox.
61
+ The execution context may not persist for the lifetime of this object.
59
62
"""
60
63
self .spec = spec
61
64
self .logger = logger
62
65
self .timeout = timeout
66
+ self .num_cpus = num_cpus
67
+ self .log_dir = log_dir
63
68
64
69
@abstractmethod
65
- def exec_run_with_timeout (
66
- self , command : str , timeout : int
67
- ) -> tuple [str , bool , float ]:
68
- """Exec"""
69
- raise NotImplementedError
70
-
71
- @abstractmethod
72
- def exec_run (self , command : str ) -> tuple [int , str ]:
73
- """Exec"""
74
- raise NotImplementedError
75
-
76
- @abstractmethod
77
- def copy_from_remote (self , remote_path : Path , local_path : Path ) -> None :
78
- """Copy"""
79
- raise NotImplementedError
80
-
81
- @abstractmethod
82
- def delete_file_from_remote (self , remote_path : Path ) -> None :
83
- """Delete"""
70
+ def exec_run_with_timeout (self , command : str ) -> tuple [str , bool , float ]:
71
+ """Execute a test command"""
84
72
raise NotImplementedError
85
73
86
- def write_test_output (
87
- self , log_dir : Path , test_output : str , timed_out : bool
88
- ) -> None :
74
+ def write_test_output (self , test_output : str , timed_out : bool ) -> None :
89
75
"""Write test output"""
90
- test_output_path = log_dir / "test_output.txt"
76
+ test_output_path = self . log_dir / "test_output.txt"
91
77
with open (test_output_path , "w" ) as f :
92
78
f .write (test_output )
93
79
if timed_out :
@@ -98,15 +84,6 @@ def write_test_output(
98
84
self .logger ,
99
85
)
100
86
101
- # copy back report.json if there is any
102
- report_file = Path (self .spec .repo_directory ) / "report.json"
103
- # Run the test command inside the container to check if the file exists
104
- exit_code , output = self .exec_run (f"test -e { report_file } " )
105
- # Check the exit code of the command
106
- if exit_code == 0 :
107
- self .copy_from_remote (report_file , log_dir / "report.json" )
108
- self .delete_file_from_remote (report_file )
109
-
110
87
def __enter__ (self ):
111
88
return self
112
89
@@ -126,39 +103,41 @@ def __init__(
126
103
spec : Spec ,
127
104
logger : logging .Logger ,
128
105
timeout : int ,
106
+ num_cpus : int ,
107
+ log_dir : Path ,
129
108
files_to_copy : Optional [Files ] = None ,
130
109
):
131
- super ().__init__ (spec , logger , timeout )
110
+ super ().__init__ (spec , logger , timeout , num_cpus , log_dir )
132
111
133
112
self .client = docker .from_env ()
134
113
self .container = create_container (
135
114
client = self .client ,
136
115
image_name = spec .repo_image_key ,
137
116
container_name = spec .get_container_name (),
117
+ nano_cpus = num_cpus ,
138
118
logger = logger ,
139
119
)
140
120
self .container .start ()
141
121
if files_to_copy :
142
122
for _ , f in files_to_copy .items ():
143
123
copy_to_container (self .container , f ["src" ], f ["dest" ]) # type: ignore
144
124
145
- def exec_run_with_timeout (
146
- self , command : str , timeout : int
147
- ) -> tuple [str , bool , float ]:
148
- """Exec"""
149
- return exec_run_with_timeout (self .container , command , timeout )
150
-
151
- def exec_run (self , command : str ) -> tuple [int , str ]:
125
+ def exec_run_with_timeout (self , command : str ) -> tuple [str , bool , float ]:
152
126
"""Exec"""
153
- return self .container .exec_run (command , demux = True )
154
-
155
- def copy_from_remote (self , remote_path : Path , local_path : Path ) -> None :
156
- """Copy"""
157
- copy_from_container (self .container , remote_path , local_path )
127
+ output = exec_run_with_timeout (self .container , command , self .timeout )
158
128
159
- def delete_file_from_remote (self , remote_path : Path ) -> None :
160
- """Delete"""
161
- delete_file_from_container (self .container , str (remote_path ))
129
+ # copy back report.json if there is any
130
+ report_file = Path (self .spec .repo_directory ) / "report.json"
131
+ # Run the test command inside the container to check if the file exists
132
+ exit_code , test_output = self .container .exec_run (
133
+ f"test -e { report_file } " , demux = True
134
+ )
135
+ # Check the exit code of the command
136
+ if exit_code == 0 :
137
+ copy_from_container (
138
+ self .container , report_file , self .log_dir / "report.json"
139
+ )
140
+ return output
162
141
163
142
def __exit__ (
164
143
self ,
@@ -176,64 +155,67 @@ def __init__(
176
155
spec : Spec ,
177
156
logger : logging .Logger ,
178
157
timeout : int ,
158
+ num_cpus : int ,
159
+ log_dir : Path ,
179
160
files_to_copy : Optional [Files ] = None ,
180
161
):
181
- super ().__init__ (spec , logger , timeout )
162
+ super ().__init__ (spec , logger , timeout , num_cpus , log_dir )
163
+
164
+ self .app = modal .App ()
182
165
183
166
# the image must exist on dockerhub
184
167
reponame = spec .repo .split ("/" )[- 1 ]
185
- image_name = f"wentingzhao/{ reponame } "
168
+ image_name = f"wentingzhao/{ reponame } :latest "
186
169
image = modal .Image .from_registry (image_name )
187
170
if files_to_copy :
188
171
for _ , f in files_to_copy .items ():
189
172
image = image .copy_local_file (f ["src" ], f ["dest" ]) # type: ignore
173
+ self .image = image
190
174
191
- self .sandbox = modal .Sandbox .create (
192
- "sleep" ,
193
- "infinity" ,
194
- image = image ,
195
- cpu = 4.0 ,
196
- timeout = timeout ,
197
- )
198
-
199
- def exec_run_with_timeout (
200
- self , command : str , timeout : int
201
- ) -> tuple [str , bool , float ]:
202
- """Execute command on modal sandbox"""
203
- print ("Executing:" , command )
204
- process = self .sandbox .exec ("bash" , "-c" , command )
205
- print ("stdout" )
206
- stdout = read_stream (process .stdout )
207
- print ("stderr" )
208
- stderr = read_stream (process .stderr )
209
- print (stderr )
210
- return stdout , False , 1.0
211
- return stdout , stderr
212
-
213
- def exec_run (self , command : str ) -> tuple [int , str ]:
175
+ def exec_run_with_timeout (self , command : str ) -> tuple [str , bool , float ]:
214
176
"""Execute command on modal sandbox"""
215
- process = self .sandbox .exec ("bash" , "-c" , command )
216
- stdout = read_stream (process .stdout )
217
- stderr = read_stream (process .stderr )
218
- print (stderr )
219
- return 1 , stdout
220
-
221
- def copy_from_remote (self , remote_path : Path , local_path : Path ) -> None :
222
- """Copy file from modal sandbox"""
223
- process = self .sandbox .exec ("bash" , "-c" , f"cat { str (remote_path )} " )
224
- output = "" .join ([line for line in process .stdout ]).strip ()
225
- with local_path .open ("w" ) as f :
226
- f .write (output )
227
-
228
- def delete_file_from_remote (self , remote_path : Path ) -> None :
229
- """Delete"""
230
- self .sandbox .exec ("bash" , "-c" , f"rm { str (remote_path )} " )
177
+ start_time = time .time ()
178
+ with modal .Volume .ephemeral () as vol :
179
+ # copy back report.json if there is any
180
+ report_file = Path (self .spec .repo_directory ) / "report.json"
181
+
182
+ self .sandbox = modal .Sandbox .create (
183
+ "bash" ,
184
+ "-c" ,
185
+ f"{ command } && cp { str (report_file )} /vol/report.json" ,
186
+ image = self .image ,
187
+ cpu = self .num_cpus ,
188
+ timeout = self .timeout ,
189
+ app = self .app ,
190
+ volumes = {"/vol" : vol },
191
+ )
192
+ self .sandbox .wait ()
193
+
194
+ # stdout has been redirected to stderr
195
+ stdout = read_stream (self .sandbox .stderr )
196
+
197
+ return_code = self .sandbox .returncode
198
+ # https://github.com/modal-labs/modal-client/blob/d577b2916b5c3bf4ebbcb58fadced84d85e1cf8c/modal/sandbox.py#L413
199
+ if return_code == 124 :
200
+ timed_out = True
201
+ else :
202
+ timed_out = False
203
+
204
+ # copy over report.json from mount
205
+ with (self .log_dir / "report.json" ).open ("wb" ) as f :
206
+ for data in vol .read_file ("report.json" ):
207
+ f .write (data )
208
+
209
+ self .sandbox .terminate ()
210
+
211
+ end_time = time .time ()
212
+
213
+ return stdout , timed_out , end_time - start_time
231
214
232
215
def __exit__ (
233
216
self ,
234
217
exctype : Optional [Type [BaseException ]],
235
218
excinst : Optional [BaseException ],
236
219
exctb : Optional [TracebackType ],
237
220
) -> None :
238
- self .sandbox .terminate ()
239
221
close_logger (self .logger )
0 commit comments