@@ -1311,6 +1311,119 @@ def from_repo(
1311
1311
logger .exception (f"Failed to initialize codebase: { e } " )
1312
1312
raise
1313
1313
1314
+ @classmethod
1315
+ def from_string (
1316
+ cls ,
1317
+ code : str ,
1318
+ * ,
1319
+ language : Literal ["python" , "typescript" ] | ProgrammingLanguage ,
1320
+ ) -> "Codebase" :
1321
+ """Creates a Codebase instance from a string of code.
1322
+
1323
+ Args:
1324
+ code (str): The source code string
1325
+ language (Literal["python", "typescript"] | ProgrammingLanguage): The programming language of the code.
1326
+
1327
+ Returns:
1328
+ Codebase: A Codebase instance initialized with the provided code
1329
+ """
1330
+ logger .info ("Creating codebase from string" )
1331
+
1332
+ # Determine language and filename
1333
+ prog_lang = ProgrammingLanguage (language .upper ()) if isinstance (language , str ) else language
1334
+ filename = "test.ts" if prog_lang == ProgrammingLanguage .TYPESCRIPT else "test.py"
1335
+
1336
+ # Create temporary directory
1337
+ import tempfile
1338
+
1339
+ tmp_dir = tempfile .mkdtemp (prefix = "codegen_" )
1340
+ logger .info (f"Using directory: { tmp_dir } " )
1341
+
1342
+ # Create codebase using factory
1343
+ from codegen .sdk .codebase .factory .codebase_factory import CodebaseFactory
1344
+
1345
+ files = {filename : code }
1346
+ codebase = CodebaseFactory .get_codebase_from_files (repo_path = tmp_dir , files = files , programming_language = prog_lang )
1347
+ logger .info ("Codebase initialization complete" )
1348
+ return codebase
1349
+
1350
+ @classmethod
1351
+ def from_files (
1352
+ cls ,
1353
+ files : dict [str , str ],
1354
+ * ,
1355
+ language : Literal ["python" , "typescript" ] | ProgrammingLanguage | None = None ,
1356
+ ) -> "Codebase" :
1357
+ """Creates a Codebase instance from multiple files.
1358
+
1359
+ Args:
1360
+ files: Dictionary mapping filenames to their content, e.g. {"main.py": "print('hello')"}
1361
+ language: Optional language override. If not provided, will be inferred from file extensions.
1362
+ All files must have extensions matching the same language.
1363
+
1364
+ Returns:
1365
+ Codebase: A Codebase instance initialized with the provided files
1366
+
1367
+ Raises:
1368
+ ValueError: If file extensions don't match a single language or if explicitly provided
1369
+ language doesn't match the extensions
1370
+
1371
+ Example:
1372
+ >>> # Language inferred as Python
1373
+ >>> files = {"main.py": "print('hello')", "utils.py": "def add(a, b): return a + b"}
1374
+ >>> codebase = Codebase.from_files(files)
1375
+
1376
+ >>> # Language inferred as TypeScript
1377
+ >>> files = {"index.ts": "console.log('hello')", "utils.tsx": "export const App = () => <div>Hello</div>"}
1378
+ >>> codebase = Codebase.from_files(files)
1379
+ """
1380
+ logger .info ("Creating codebase from files" )
1381
+
1382
+ if not files :
1383
+ # Default to Python if no files provided
1384
+ prog_lang = ProgrammingLanguage .PYTHON if language is None else (ProgrammingLanguage (language .upper ()) if isinstance (language , str ) else language )
1385
+ logger .info (f"No files provided, using { prog_lang } " )
1386
+ else :
1387
+ # Map extensions to languages
1388
+ py_extensions = {".py" }
1389
+ ts_extensions = {".ts" , ".tsx" , ".js" , ".jsx" }
1390
+
1391
+ # Get unique extensions from files
1392
+ extensions = {os .path .splitext (f )[1 ].lower () for f in files }
1393
+
1394
+ # Determine language from extensions
1395
+ inferred_lang = None
1396
+ if all (ext in py_extensions for ext in extensions ):
1397
+ inferred_lang = ProgrammingLanguage .PYTHON
1398
+ elif all (ext in ts_extensions for ext in extensions ):
1399
+ inferred_lang = ProgrammingLanguage .TYPESCRIPT
1400
+ else :
1401
+ msg = f"Cannot determine single language from extensions: { extensions } . Files must all be Python (.py) or TypeScript (.ts, .tsx, .js, .jsx)"
1402
+ raise ValueError (msg )
1403
+
1404
+ # If language was explicitly provided, verify it matches inferred language
1405
+ if language is not None :
1406
+ explicit_lang = ProgrammingLanguage (language .upper ()) if isinstance (language , str ) else language
1407
+ if explicit_lang != inferred_lang :
1408
+ msg = f"Provided language { explicit_lang } doesn't match inferred language { inferred_lang } from file extensions"
1409
+ raise ValueError (msg )
1410
+
1411
+ prog_lang = inferred_lang
1412
+ logger .info (f"Using language: { prog_lang } ({ 'inferred' if language is None else 'explicit' } )" )
1413
+
1414
+ # Create temporary directory
1415
+ import tempfile
1416
+
1417
+ tmp_dir = tempfile .mkdtemp (prefix = "codegen_" )
1418
+ logger .info (f"Using directory: { tmp_dir } " )
1419
+
1420
+ # Create codebase using factory
1421
+ from codegen .sdk .codebase .factory .codebase_factory import CodebaseFactory
1422
+
1423
+ codebase = CodebaseFactory .get_codebase_from_files (repo_path = tmp_dir , files = files , programming_language = prog_lang )
1424
+ logger .info ("Codebase initialization complete" )
1425
+ return codebase
1426
+
1314
1427
def get_modified_symbols_in_pr (self , pr_id : int ) -> tuple [str , dict [str , str ], list [str ]]:
1315
1428
"""Get all modified symbols in a pull request"""
1316
1429
pr = self ._op .get_pull_request (pr_id )
0 commit comments