Skip to content

Commit 5bc4a15

Browse files
add more permission capabilities to PythonInterpreter (#8296)
* add more permission capabilities to PythonInterpreter * update tests * update file access/write tests * resolve comments * resolve comments * update flags and interpreter usage * resolve comments
1 parent eed2f8c commit 5bc4a15

File tree

5 files changed

+260
-14
lines changed

5 files changed

+260
-14
lines changed

dspy/predict/code_act.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import inspect
2+
from typing import Callable, Union, Type, Optional
23
import logging
34
from inspect import Signature
4-
from typing import Callable, Type, Union
55

66
import dspy
77
from dspy.adapters.types.tool import Tool
@@ -17,15 +17,15 @@ class CodeAct(ReAct, ProgramOfThought):
1717
CodeAct is a module that utilizes the Code Interpreter and predefined tools to solve the problem.
1818
"""
1919

20-
def __init__(self, signature: Union[str, Type[Signature]], tools: list[Callable], max_iters: int = 5):
20+
def __init__(self, signature: Union[str, Type[Signature]], tools: list[Callable], max_iters: int = 5, interpreter: Optional[PythonInterpreter] = None):
2121
"""
2222
Initializes the CodeAct class with the specified model, temperature, and max tokens.
2323
2424
Args:
2525
signature (Union[str, Type[Signature]]): The signature of the module.
2626
tools (list[Callable]): The tool callables to be used. CodeAct only accepts functions and not callable objects.
2727
max_iters (int): The maximum number of iterations to generate the answer.
28-
28+
interpreter: PythonInterpreter instance to use. If None, a new one is instantiated.
2929
Example:
3030
```python
3131
from dspy.predict import CodeAct
@@ -67,8 +67,8 @@ def factorial(n):
6767
self.codeact = dspy.Predict(codeact_signature)
6868
self.extractor = dspy.ChainOfThought(extract_signature)
6969
# It will raises exception when dspy cannot find available deno instance by now.
70-
self.interpreter = PythonInterpreter()
71-
70+
self.interpreter = interpreter or PythonInterpreter()
71+
7272
def _build_instructions(self, signature, tools):
7373
instructions = [f"{signature.instructions}\n"] if signature.instructions else []
7474
inputs = ", ".join([f"`{k}`" for k in signature.input_fields.keys()])

dspy/predict/program_of_thought.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
22
import logging
33
import re
4-
from typing import Type, Union
4+
from typing import Type, Union, Optional
55

66
import dspy
77
from dspy.primitives.program import Module
@@ -27,11 +27,12 @@ class ProgramOfThought(Module):
2727
```
2828
"""
2929

30-
def __init__(self, signature: Union[str, Type[Signature]], max_iters=3):
30+
def __init__(self, signature: Union[str, Type[Signature]], max_iters=3, interpreter: Optional[PythonInterpreter] = None):
3131
"""
3232
Args:
3333
signature: The signature of the module.
3434
max_iters: The maximum number of iterations to retry code generation and execution.
35+
interpreter: PythonInterpreter instance to use. If None, a new one is instantiated.
3536
"""
3637
super().__init__()
3738
self.signature = signature = ensure_signature(signature)
@@ -59,7 +60,7 @@ def __init__(self, signature: Union[str, Type[Signature]], max_iters=3):
5960
),
6061
)
6162
# It will raises exception when dspy cannot find available deno instance by now.
62-
self.interpreter = PythonInterpreter()
63+
self.interpreter = interpreter or PythonInterpreter()
6364

6465
def _generate_signature(self, mode):
6566
signature_dict = dict(self.input_fields)

dspy/primitives/python_interpreter.py

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
import os
33
import subprocess
44
from types import TracebackType
5-
from typing import Any, Dict, List, Optional
5+
from typing import Any, Dict, List, Optional, Union
6+
from os import PathLike
67

78

89
class InterpreterError(RuntimeError):
@@ -24,16 +25,99 @@ class PythonInterpreter:
2425
```
2526
"""
2627

27-
def __init__(self, deno_command: Optional[List[str]] = None) -> None:
28+
def __init__(
29+
self,
30+
deno_command: Optional[List[str]] = None,
31+
enable_read_paths: Optional[List[Union[PathLike, str]]] = None,
32+
enable_write_paths: Optional[List[Union[PathLike, str]]] = None,
33+
enable_env_vars: Optional[List[str]] = None,
34+
enable_network_access: Optional[List[str]] = None,
35+
sync_files: bool = True,
36+
) -> None:
37+
"""
38+
Args:
39+
deno_command: command list to launch Deno.
40+
enable_read_paths: Files or directories to allow reading from in the sandbox.
41+
enable_write_paths: Files or directories to allow writing to in the sandbox.
42+
enable_env_vars: Environment variable names to allow in the sandbox.
43+
enable_network_access: Domains or IPs to allow network access in the sandbox.
44+
sync_files: If set, syncs changes within the sandbox back to original files after execution.
45+
"""
2846
if isinstance(deno_command, dict):
2947
deno_command = None # no-op, just a guard in case someone passes a dict
30-
self.deno_command = deno_command or ["deno", "run", "--allow-read", self._get_runner_path()]
48+
49+
self.enable_read_paths = enable_read_paths or []
50+
self.enable_write_paths = enable_write_paths or []
51+
self.enable_env_vars = enable_env_vars or []
52+
self.enable_network_access = enable_network_access or []
53+
self.sync_files = sync_files
54+
# TODO later on add enable_run (--allow-run) by proxying subprocess.run through Deno.run() to fix 'emscripten does not support processes' error
55+
56+
if deno_command:
57+
self.deno_command = list(deno_command)
58+
else:
59+
args = ['deno', 'run', '--allow-read']
60+
self._env_arg = ""
61+
if self.enable_env_vars:
62+
user_vars = [str(v).strip() for v in self.enable_env_vars]
63+
args.append("--allow-env=" + ",".join(user_vars))
64+
self._env_arg = ",".join(user_vars)
65+
if self.enable_network_access:
66+
args.append(f"--allow-net={','.join(str(x) for x in self.enable_network_access)}")
67+
if self.enable_write_paths:
68+
args.append(f"--allow-write={','.join(str(x) for x in self.enable_write_paths)}")
69+
70+
args.append(self._get_runner_path())
71+
72+
# For runner.js to load in env vars
73+
if self._env_arg:
74+
args.append(self._env_arg)
75+
self.deno_command = args
76+
3177
self.deno_process = None
78+
self._mounted_files = False
3279

3380
def _get_runner_path(self) -> str:
3481
current_dir = os.path.dirname(os.path.abspath(__file__))
3582
return os.path.join(current_dir, "runner.js")
3683

84+
def _mount_files(self):
85+
if self._mounted_files:
86+
return
87+
paths_to_mount = []
88+
if self.enable_read_paths:
89+
paths_to_mount.extend(self.enable_read_paths)
90+
if self.enable_write_paths:
91+
paths_to_mount.extend(self.enable_write_paths)
92+
if not paths_to_mount:
93+
return
94+
for path in paths_to_mount:
95+
if not path:
96+
continue
97+
if not os.path.exists(path):
98+
if self.enable_write_paths and path in self.enable_write_paths:
99+
open(path, "a").close()
100+
else:
101+
raise FileNotFoundError(f"Cannot mount non-existent file: {path}")
102+
virtual_path = f"/sandbox/{os.path.basename(path)}"
103+
mount_msg = json.dumps({"mount_file": str(path), "virtual_path": virtual_path})
104+
self.deno_process.stdin.write(mount_msg + "\n")
105+
self.deno_process.stdin.flush()
106+
self._mounted_files = True
107+
108+
def _sync_files(self):
109+
if not self.enable_write_paths or not self.sync_files:
110+
return
111+
for path in self.enable_write_paths:
112+
virtual_path = f"/sandbox/{os.path.basename(path)}"
113+
sync_msg = json.dumps({
114+
"sync_file": virtual_path,
115+
"host_file": str(path)
116+
})
117+
self.deno_process.stdin.write(sync_msg + "\n")
118+
self.deno_process.stdin.flush()
119+
120+
37121
def _ensure_deno_process(self) -> None:
38122
if self.deno_process is None or self.deno_process.poll() is not None:
39123
try:
@@ -43,6 +127,7 @@ def _ensure_deno_process(self) -> None:
43127
stdout=subprocess.PIPE,
44128
stderr=subprocess.PIPE,
45129
text=True,
130+
env=os.environ.copy()
46131
)
47132
except FileNotFoundError as e:
48133
install_instructions = (
@@ -87,6 +172,7 @@ def execute(
87172
variables = variables or {}
88173
code = self._inject_variables(code, variables)
89174
self._ensure_deno_process()
175+
self._mount_files()
90176

91177
# Send the code as JSON
92178
input_data = json.dumps({"code": code})
@@ -127,6 +213,7 @@ def execute(
127213
raise InterpreterError(f"{error_type}: {result.get('errorArgs') or error_msg}")
128214

129215
# If there's no error or got `FinalAnswer`, return the "output" field
216+
self._sync_files()
130217
return result.get("output", None)
131218

132219
def __enter__(self):

dspy/primitives/runner.js

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,21 @@ import { readLines } from "https://deno.land/[email protected]/io/mod.ts";
55

66
const pyodide = await pyodideModule.loadPyodide();
77

8+
try {
9+
const env_vars = (Deno.args[0] ?? "").split(",").filter(Boolean);
10+
for (const key of env_vars) {
11+
const val = Deno.env.get(key);
12+
if (val !== undefined) {
13+
pyodide.runPython(`
14+
import os
15+
os.environ[${JSON.stringify(key)}] = ${JSON.stringify(val)}
16+
`);
17+
}
18+
}
19+
} catch (e) {
20+
console.error("Error setting environment variables in Pyodide:", e);
21+
}
22+
823
for await (const line of readLines(Deno.stdin)) {
924
let input;
1025
try {
@@ -17,6 +32,43 @@ for await (const line of readLines(Deno.stdin)) {
1732
continue;
1833
}
1934

35+
if (input.mount_file) {
36+
const hostPath = input.mount_file;
37+
const virtualPath = input.virtual_path || hostPath;
38+
try {
39+
const contents = await Deno.readFile(hostPath);
40+
const dirs = virtualPath.split('/').slice(1, -1);
41+
let cur = '';
42+
for (const d of dirs) {
43+
cur += '/' + d;
44+
try {
45+
pyodide.FS.mkdir(cur);
46+
} catch (e) {
47+
if (!(e && e.message && e.message.includes('File exists'))) {
48+
console.log("[DEBUG] Error creating directory in Pyodide file system:", cur, "|", e.message);
49+
}
50+
}
51+
}
52+
pyodide.FS.writeFile(virtualPath, contents);
53+
} catch (e) {
54+
console.log(JSON.stringify({error: "Failed to mount file: " + e.message}));
55+
}
56+
continue;
57+
}
58+
59+
if (input.sync_file) {
60+
const virtualPath = input.sync_file;
61+
const hostPath = input.host_file || virtualPath;
62+
try {
63+
const contents = pyodide.FS.readFile(virtualPath);
64+
await Deno.writeFile(hostPath, contents);
65+
} catch (e) {
66+
console.log("[DEBUG] Failed to sync file:", hostPath, "|", e.message);
67+
}
68+
continue;
69+
}
70+
71+
2072
// Expecting an object like { "code": "...", ... }
2173
if (typeof input !== 'object' || input === null) {
2274
console.log(JSON.stringify({

tests/primitives/test_python_interpreter.py

Lines changed: 109 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import random
22
import shutil
3-
3+
import os
44
import pytest
5-
65
from dspy.primitives.python_interpreter import InterpreterError, PythonInterpreter
76

87
# This test suite requires deno to be installed. Please install deno following https://docs.deno.com/runtime/getting_started/installation/
@@ -59,5 +58,112 @@ def test_final_answer_trick():
5958
code = f"final_answer('The result is', {token})"
6059
result = interpreter(code)
6160

62-
# They should matain the same order
61+
# They should maintain the same order
6362
assert result == ["The result is", token], "The returned results are differ, `final_answer` trick doesn't work"
63+
64+
def test_enable_env_vars_flag():
65+
os.environ["FOO_TEST_ENV"] = "test_value"
66+
67+
with PythonInterpreter(enable_env_vars=None) as interpreter:
68+
code = "import os\nresult = os.getenv('FOO_TEST_ENV')\nresult"
69+
result = interpreter.execute(code)
70+
assert result == "", "Environment variables should be inaccessible without allow-env"
71+
72+
with PythonInterpreter(enable_env_vars=["FOO_TEST_ENV"]) as interpreter:
73+
code = "import os\nresult = os.getenv('FOO_TEST_ENV')\nresult"
74+
result = interpreter.execute(code)
75+
assert result == "test_value", "Environment variables should be accessible with allow-env"
76+
77+
78+
79+
def test_read_file_access_control(tmp_path):
80+
testfile_path = tmp_path / "test_temp_file.txt"
81+
virtual_path = f"/sandbox/{testfile_path.name}"
82+
with open(testfile_path, "w") as f:
83+
f.write("test content")
84+
85+
with PythonInterpreter(enable_read_paths=[str(testfile_path)]) as interpreter:
86+
code = (
87+
f"with open({repr(virtual_path)}, 'r') as f:\n"
88+
f" data = f.read()\n"
89+
f"data"
90+
)
91+
result = interpreter.execute(code)
92+
assert result == "test content", "Test file should be accessible with enable_read_paths and specified file"
93+
94+
with PythonInterpreter(enable_read_paths=None) as interpreter:
95+
code = (
96+
f"try:\n"
97+
f" with open({repr(virtual_path)}, 'r') as f:\n"
98+
f" data = f.read()\n"
99+
f"except Exception as e:\n"
100+
f" data = str(e)\n"
101+
f"data"
102+
)
103+
result = interpreter.execute(code)
104+
assert ("PermissionDenied" in result or "denied" in result.lower() or "no such file" in result.lower()), "Test file should not be accessible without enable_read_paths"
105+
106+
def test_enable_write_flag(tmp_path):
107+
testfile_path = tmp_path / "test_temp_output.txt"
108+
virtual_path = f"/sandbox/{testfile_path.name}"
109+
110+
with PythonInterpreter(enable_write_paths=None) as interpreter:
111+
code = (
112+
f"try:\n"
113+
f" with open({repr(virtual_path)}, 'w') as f:\n"
114+
f" f.write('blocked')\n"
115+
f" result = 'wrote'\n"
116+
f"except Exception as e:\n"
117+
f" result = str(e)\n"
118+
f"result"
119+
)
120+
result = interpreter.execute(code)
121+
assert ("PermissionDenied" in result or "denied" in result.lower() or "no such file" in result.lower()), "Test file should not be writable without enable_write_paths"
122+
123+
with PythonInterpreter(enable_write_paths=[str(testfile_path)]) as interpreter:
124+
code = (
125+
f"with open({repr(virtual_path)}, 'w') as f:\n"
126+
f" f.write('allowed')\n"
127+
f"'ok'"
128+
)
129+
result = interpreter.execute(code)
130+
assert result == "ok", "Test file should be writable with enable_write_paths"
131+
assert testfile_path.exists()
132+
with open(testfile_path, "r") as f:
133+
assert f.read() == "allowed", "Test file outputs should match content written during execution"
134+
135+
with open(testfile_path, "w") as f:
136+
f.write("original_content")
137+
with PythonInterpreter(enable_write_paths=[str(testfile_path)], sync_files=False) as interpreter:
138+
code = (
139+
f"with open({repr(virtual_path)}, 'w') as f:\n"
140+
f" f.write('should_not_sync')\n"
141+
f"'done_no_sync'"
142+
)
143+
result = interpreter.execute(code)
144+
assert result == "done_no_sync"
145+
with open(testfile_path, "r") as f:
146+
assert f.read() == "original_content", "File should not be changed when sync_files is False"
147+
148+
149+
150+
def test_enable_net_flag():
151+
test_url = "https://example.com"
152+
153+
with PythonInterpreter(enable_network_access=None) as interpreter:
154+
code = (
155+
"import js\n"
156+
f"resp = await js.fetch({repr(test_url)})\n"
157+
"resp.status"
158+
)
159+
with pytest.raises(InterpreterError, match="PythonError"):
160+
interpreter.execute(code)
161+
162+
with PythonInterpreter(enable_network_access=["example.com"]) as interpreter:
163+
code = (
164+
"import js\n"
165+
f"resp = await js.fetch({repr(test_url)})\n"
166+
"resp.status"
167+
)
168+
result = interpreter.execute(code)
169+
assert int(result) == 200, "Network access is permitted with enable_network_access"

0 commit comments

Comments
 (0)