Skip to content

Commit 3240904

Browse files
author
codegen-bot
committed
.
1 parent b23e513 commit 3240904

File tree

3 files changed

+257
-0
lines changed

3 files changed

+257
-0
lines changed

src/codegen/sdk/core/codebase.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,119 @@ def from_repo(
13111311
logger.exception(f"Failed to initialize codebase: {e}")
13121312
raise
13131313

1314+
@classmethod
1315+
def from_string(
1316+
cls,
1317+
code: str,
1318+
*,
1319+
language: Literal["python", "typescript"] | ProgrammingLanguage,
1320+
) -> "Codebase":
1321+
"""Creates a Codebase instance from a string of code.
1322+
1323+
Args:
1324+
code (str): The source code string
1325+
language (Literal["python", "typescript"] | ProgrammingLanguage): The programming language of the code.
1326+
1327+
Returns:
1328+
Codebase: A Codebase instance initialized with the provided code
1329+
"""
1330+
logger.info("Creating codebase from string")
1331+
1332+
# Determine language and filename
1333+
prog_lang = ProgrammingLanguage(language.upper()) if isinstance(language, str) else language
1334+
filename = "test.ts" if prog_lang == ProgrammingLanguage.TYPESCRIPT else "test.py"
1335+
1336+
# Create temporary directory
1337+
import tempfile
1338+
1339+
tmp_dir = tempfile.mkdtemp(prefix="codegen_")
1340+
logger.info(f"Using directory: {tmp_dir}")
1341+
1342+
# Create codebase using factory
1343+
from codegen.sdk.codebase.factory.codebase_factory import CodebaseFactory
1344+
1345+
files = {filename: code}
1346+
codebase = CodebaseFactory.get_codebase_from_files(repo_path=tmp_dir, files=files, programming_language=prog_lang)
1347+
logger.info("Codebase initialization complete")
1348+
return codebase
1349+
1350+
@classmethod
1351+
def from_files(
1352+
cls,
1353+
files: dict[str, str],
1354+
*,
1355+
language: Literal["python", "typescript"] | ProgrammingLanguage | None = None,
1356+
) -> "Codebase":
1357+
"""Creates a Codebase instance from multiple files.
1358+
1359+
Args:
1360+
files: Dictionary mapping filenames to their content, e.g. {"main.py": "print('hello')"}
1361+
language: Optional language override. If not provided, will be inferred from file extensions.
1362+
All files must have extensions matching the same language.
1363+
1364+
Returns:
1365+
Codebase: A Codebase instance initialized with the provided files
1366+
1367+
Raises:
1368+
ValueError: If file extensions don't match a single language or if explicitly provided
1369+
language doesn't match the extensions
1370+
1371+
Example:
1372+
>>> # Language inferred as Python
1373+
>>> files = {"main.py": "print('hello')", "utils.py": "def add(a, b): return a + b"}
1374+
>>> codebase = Codebase.from_files(files)
1375+
1376+
>>> # Language inferred as TypeScript
1377+
>>> files = {"index.ts": "console.log('hello')", "utils.tsx": "export const App = () => <div>Hello</div>"}
1378+
>>> codebase = Codebase.from_files(files)
1379+
"""
1380+
logger.info("Creating codebase from files")
1381+
1382+
if not files:
1383+
# Default to Python if no files provided
1384+
prog_lang = ProgrammingLanguage.PYTHON if language is None else (ProgrammingLanguage(language.upper()) if isinstance(language, str) else language)
1385+
logger.info(f"No files provided, using {prog_lang}")
1386+
else:
1387+
# Map extensions to languages
1388+
py_extensions = {".py"}
1389+
ts_extensions = {".ts", ".tsx", ".js", ".jsx"}
1390+
1391+
# Get unique extensions from files
1392+
extensions = {os.path.splitext(f)[1].lower() for f in files}
1393+
1394+
# Determine language from extensions
1395+
inferred_lang = None
1396+
if all(ext in py_extensions for ext in extensions):
1397+
inferred_lang = ProgrammingLanguage.PYTHON
1398+
elif all(ext in ts_extensions for ext in extensions):
1399+
inferred_lang = ProgrammingLanguage.TYPESCRIPT
1400+
else:
1401+
msg = f"Cannot determine single language from extensions: {extensions}. Files must all be Python (.py) or TypeScript (.ts, .tsx, .js, .jsx)"
1402+
raise ValueError(msg)
1403+
1404+
# If language was explicitly provided, verify it matches inferred language
1405+
if language is not None:
1406+
explicit_lang = ProgrammingLanguage(language.upper()) if isinstance(language, str) else language
1407+
if explicit_lang != inferred_lang:
1408+
msg = f"Provided language {explicit_lang} doesn't match inferred language {inferred_lang} from file extensions"
1409+
raise ValueError(msg)
1410+
1411+
prog_lang = inferred_lang
1412+
logger.info(f"Using language: {prog_lang} ({'inferred' if language is None else 'explicit'})")
1413+
1414+
# Create temporary directory
1415+
import tempfile
1416+
1417+
tmp_dir = tempfile.mkdtemp(prefix="codegen_")
1418+
logger.info(f"Using directory: {tmp_dir}")
1419+
1420+
# Create codebase using factory
1421+
from codegen.sdk.codebase.factory.codebase_factory import CodebaseFactory
1422+
1423+
codebase = CodebaseFactory.get_codebase_from_files(repo_path=tmp_dir, files=files, programming_language=prog_lang)
1424+
logger.info("Codebase initialization complete")
1425+
return codebase
1426+
13141427
def get_modified_symbols_in_pr(self, pr_id: int) -> tuple[str, dict[str, str], list[str]]:
13151428
"""Get all modified symbols in a pull request"""
13161429
pr = self._op.get_pull_request(pr_id)
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import pytest
2+
3+
from codegen.sdk.core.codebase import Codebase
4+
5+
6+
def test_from_files_python():
7+
"""Test creating a Python codebase from multiple files"""
8+
files = {"main.py": "from utils import add\nprint(add(1, 2))", "utils.py": "def add(a, b):\n return a + b"}
9+
# Language is optional, will be inferred
10+
codebase = Codebase.from_files(files)
11+
assert len(codebase.files) == 2
12+
assert any(f.filepath.endswith("main.py") for f in codebase.files)
13+
assert any(f.filepath.endswith("utils.py") for f in codebase.files)
14+
assert any("from utils import add" in f.content for f in codebase.files)
15+
16+
17+
def test_from_files_typescript():
18+
"""Test creating a TypeScript codebase from multiple files"""
19+
files = {"index.ts": "import { add } from './utils';\nconsole.log(add(1, 2));", "utils.ts": "export function add(a: number, b: number): number {\n return a + b;\n}"}
20+
# Language is optional, will be inferred
21+
codebase = Codebase.from_files(files)
22+
assert len(codebase.files) == 2
23+
assert any(f.filepath.endswith("index.ts") for f in codebase.files)
24+
assert any(f.filepath.endswith("utils.ts") for f in codebase.files)
25+
assert any("import { add }" in f.content for f in codebase.files)
26+
27+
28+
def test_from_files_empty():
29+
"""Test creating a codebase with no files"""
30+
# Defaults to Python when no files provided
31+
codebase = Codebase.from_files({})
32+
assert len(codebase.files) == 0
33+
34+
35+
def test_from_files_mixed_extensions():
36+
"""Test files with mixed extensions raises error"""
37+
files = {"main.py": "print('hello')", "test.ts": "console.log('world')"}
38+
with pytest.raises(ValueError, match="Cannot determine single language from extensions"):
39+
Codebase.from_files(files)
40+
41+
42+
def test_from_files_typescript_multiple_extensions():
43+
"""Test TypeScript codebase with various valid extensions"""
44+
files = {
45+
"index.ts": "console.log('hi')",
46+
"component.tsx": "export const App = () => <div>Hello</div>",
47+
"utils.js": "module.exports = { add: (a, b) => a + b }",
48+
"button.jsx": "export const Button = () => <button>Click</button>",
49+
}
50+
# Language is optional, will be inferred as TypeScript
51+
codebase = Codebase.from_files(files)
52+
assert len(codebase.files) == 4
53+
54+
55+
def test_from_files_explicit_language_mismatch():
56+
"""Test error when explicit language doesn't match extensions"""
57+
files = {"main.py": "print('hello')", "utils.py": "def add(a, b): return a + b"}
58+
with pytest.raises(ValueError, match="Provided language.*doesn't match inferred language"):
59+
Codebase.from_files(files, language="typescript")
60+
61+
62+
def test_from_files_explicit_language_match():
63+
"""Test explicit language matching file extensions works"""
64+
files = {"main.py": "print('hello')", "utils.py": "def add(a, b): return a + b"}
65+
codebase = Codebase.from_files(files, language="python")
66+
assert len(codebase.files) == 2
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import pytest
2+
3+
from codegen.sdk.core.codebase import Codebase
4+
from codegen.shared.enums.programming_language import ProgrammingLanguage
5+
6+
7+
def test_from_string_python():
8+
"""Test creating a Python codebase from string"""
9+
code = """
10+
def hello():
11+
return "world"
12+
"""
13+
codebase = Codebase.from_string(code, language="python")
14+
assert len(codebase.files) == 1
15+
assert codebase.files[0].filepath.endswith("test.py")
16+
assert "def hello" in codebase.files[0].content
17+
18+
19+
def test_from_string_typescript():
20+
"""Test creating a TypeScript codebase from string"""
21+
code = """
22+
function hello(): string {
23+
return "world";
24+
}
25+
"""
26+
codebase = Codebase.from_string(code, language="typescript")
27+
assert len(codebase.files) == 1
28+
assert codebase.files[0].filepath.endswith("test.ts")
29+
assert "function hello" in codebase.files[0].content
30+
31+
32+
def test_from_string_with_enum():
33+
"""Test creating a codebase using ProgrammingLanguage enum"""
34+
code = "const x = 42;"
35+
codebase = Codebase.from_string(code, language=ProgrammingLanguage.TYPESCRIPT)
36+
assert len(codebase.files) == 1
37+
assert codebase.files[0].filepath.endswith("test.ts")
38+
39+
40+
def test_from_string_invalid_syntax():
41+
"""Test that invalid syntax is still accepted (parsing happens later)"""
42+
code = "this is not valid python"
43+
codebase = Codebase.from_string(code, language="python")
44+
assert len(codebase.files) == 1
45+
assert codebase.files[0].content == code
46+
47+
48+
def test_from_string_empty():
49+
"""Test creating a codebase from empty string"""
50+
codebase = Codebase.from_string("", language="python")
51+
assert len(codebase.files) == 1
52+
assert codebase.files[0].content == ""
53+
54+
55+
def test_from_string_missing_language():
56+
"""Test that language is required"""
57+
with pytest.raises(TypeError, match="missing.*required.*argument.*language"):
58+
Codebase.from_string("print('hello')")
59+
60+
61+
def test_from_string_invalid_language():
62+
"""Test that invalid language raises error"""
63+
with pytest.raises(ValueError):
64+
Codebase.from_string("print('hello')", language="invalid")
65+
66+
67+
def test_from_string_multifile():
68+
"""Test that multifile is not supported yet"""
69+
code = """
70+
# file1.py
71+
def hello(): pass
72+
73+
# file2.py
74+
def world(): pass
75+
"""
76+
# Still works, just puts everything in one file
77+
codebase = Codebase.from_string(code, language="python")
78+
assert len(codebase.files) == 1

0 commit comments

Comments
 (0)