Skip to content

feat!: Finalize some of the programming_language -> language migration #490

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/api-reference/core/Codebase.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,8 @@ Fetches a codebase from GitHub and returns a Codebase instance.
defaultValue=""
/>
<Parameter
name="programming_language"
type={ <code className="text-sm bg-gray-100 px-2 py-0.5 rounded">ProgrammingLanguage | None</code> }
name="language"
type={ <code className="text-sm bg-gray-100 px-2 py-0.5 rounded">Literal["python", "typescript"] | ProgrammingLanguage | None</code> }
description="The programming language of the repo. Defaults to None."
defaultValue=""
/>
Expand Down
3 changes: 1 addition & 2 deletions src/codegen/extensions/graph/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from codegen import Codebase
from codegen.extensions.graph.create_graph import create_codebase_graph
from codegen.extensions.graph.neo4j_exporter import Neo4jExporter
from codegen.shared.enums.programming_language import ProgrammingLanguage


def visualize_codebase(codebase, neo4j_uri: str, username: str, password: str):
Expand Down Expand Up @@ -39,5 +38,5 @@ def visualize_codebase(codebase, neo4j_uri: str, username: str, password: str):

if __name__ == "__main__":
# Initialize codebase
codebase = Codebase("../../", programming_language=ProgrammingLanguage.PYTHON)
codebase = Codebase("../../", language="python")
visualize_codebase(codebase, "bolt://localhost:7687", "neo4j", "password")
5 changes: 3 additions & 2 deletions src/codegen/extensions/mcp/codebase_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@

mcp = FastMCP(
"codebase-agent-mcp",
instructions="Use this server to access any information from your codebase. This tool can provide information ranging from AST Symbol details and information from across the codebase. Use this tool for all questions, queries regarding your codebase.",
instructions="""Use this server to access any information from your codebase. This tool can provide information ranging from AST Symbol details and information from across the codebase.
Use this tool for all questions, queries regarding your codebase.""",
)


Expand All @@ -27,7 +28,7 @@ def query_codebase(
if not os.path.exists(codebase_dir):
return {"error": f"Codebase directory '{codebase_dir}' does not exist. Please provide a valid directory path."}
# Initialize codebase
codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language)
codebase = Codebase(repo_path=codebase_dir, language=codebase_language)

# Create the agent
agent = create_codebase_inspector_agent(codebase=codebase, model_name="gpt-4", verbose=True)
Expand Down
2 changes: 1 addition & 1 deletion src/codegen/extensions/mcp/codebase_mods.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def split_files_by_function(
):
if not os.path.exists(codebase_dir):
return {"error": f"Codebase directory '{codebase_dir}' does not exist. Please provide a valid directory path."}
codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language)
codebase = Codebase(repo_path=codebase_dir, language=codebase_language)
new_files = {}
file = codebase.get_file(target_file)
# for each test_function in the file
Expand Down
4 changes: 2 additions & 2 deletions src/codegen/extensions/mcp/codebase_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def reveal_symbol_tool(
collect_dependencies: Annotated[Optional[bool], "includes dependencies of symbol"],
collect_usages: Annotated[Optional[bool], "includes usages of symbol"],
):
codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language)
codebase = Codebase(repo_path=codebase_dir, language=codebase_language)
result = reveal_symbol(
codebase=codebase,
symbol_name=symbol_name,
Expand All @@ -45,7 +45,7 @@ def search_codebase_tool(
codebase_language: Annotated[ProgrammingLanguage, "The language the codebase is written in"],
use_regex: Annotated[bool, "use regex for the search query"],
):
codebase = Codebase(repo_path=codebase_dir, programming_language=codebase_language)
codebase = Codebase(repo_path=codebase_dir, language=codebase_language)
result = search(codebase, query, target_directories, use_regex=use_regex)
return json.dumps(result, indent=2)

Expand Down
4 changes: 2 additions & 2 deletions src/codegen/sdk/core/codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
self,
repo_path: str,
*,
language: Literal["python", "typescript"] | None = None,
language: Literal["python", "typescript"] | ProgrammingLanguage | None = None,
projects: None = None,
config: CodebaseConfig = DefaultConfig,
io: IO | None = None,
Expand Down Expand Up @@ -180,13 +180,13 @@
main_project = ProjectConfig.from_path(repo_path, programming_language=ProgrammingLanguage(language.upper()) if language else None)
projects = [main_project]
else:
main_project = projects[0]

Check failure on line 183 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Value of type "list[ProjectConfig] | None" is not indexable [index]

# Initialize codebase
self._op = main_project.repo_operator
self.viz = VisualizationManager(op=self._op)
self.repo_path = Path(self._op.repo_path)
self.ctx = CodebaseContext(projects, config=config, io=io, progress=progress)

Check failure on line 189 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "CodebaseContext" has incompatible type "list[ProjectConfig] | None"; expected "list[ProjectConfig]" [arg-type]
self.console = Console(record=True, soft_wrap=True)

@noapidoc
Expand All @@ -202,7 +202,7 @@
yield "nodes", len(self.ctx.nodes)
yield "edges", len(self.ctx.edges)

__rich_repr__.angular = ANGULAR_STYLE

Check failure on line 205 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: "Callable[[Codebase[TSourceFile, TDirectory, TSymbol, TClass, TFunction, TImport, TGlobalVar, TInterface, TTypeAlias, TParameter, TCodeBlock]], Iterable[Any | tuple[Any] | tuple[str, Any] | tuple[str, Any, Any]]]" has no attribute "angular" [attr-defined]

@property
@deprecated("Please do not use the local repo operator directly")
Expand Down Expand Up @@ -244,8 +244,8 @@

@noapidoc
def _symbols(self, symbol_type: SymbolType | None = None) -> list[TSymbol | TClass | TFunction | TGlobalVar]:
matches: list[Symbol] = self.ctx.get_nodes(NodeType.SYMBOL)

Check failure on line 247 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible types in assignment (expression has type "list[Importable[Any]]", variable has type "list[Symbol[Any, Any]]") [assignment]
return [x for x in matches if x.is_top_level and (symbol_type is None or x.symbol_type == symbol_type)]

Check failure on line 248 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: List comprehension has incompatible type List[Symbol[Any, Any]]; expected List[TSymbol | TClass | TFunction | TGlobalVar] [misc]

# =====[ Node Types ]=====
@overload
Expand All @@ -254,7 +254,7 @@
def files(self, *, extensions: Literal["*"]) -> list[File]: ...
@overload
def files(self, *, extensions: None = ...) -> list[TSourceFile]: ...
@proxy_property

Check failure on line 257 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: "cached_property[ProxyProperty[[Codebase[TSourceFile, TDirectory, TSymbol, TClass, TFunction, TImport, TGlobalVar, TInterface, TTypeAlias, TParameter, TCodeBlock], DefaultNamedArg(list[str] | Literal['*'] | None, 'extensions')], list[TSourceFile] | list[File]]]" not callable [operator]
def files(self, *, extensions: list[str] | Literal["*"] | None = None) -> list[TSourceFile] | list[File]:
"""A list property that returns all files in the codebase.

Expand Down Expand Up @@ -284,15 +284,15 @@
return sort_editables(files, alphabetical=True, dedupe=False)

@cached_property
def codeowners(self) -> list["CodeOwner[TSourceFile]"]:

Check failure on line 287 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: "CodeOwner" expects 7 type arguments, but 1 given [type-arg]
"""List all CodeOnwers in the codebase.

Returns:
list[CodeOwners]: A list of CodeOwners objects in the codebase.
"""
if self.G.codeowners_parser is None:

Check failure on line 293 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: "Codebase[TSourceFile, TDirectory, TSymbol, TClass, TFunction, TImport, TGlobalVar, TInterface, TTypeAlias, TParameter, TCodeBlock]" has no attribute "G" [attr-defined]
return []
return CodeOwner.from_parser(self.G.codeowners_parser, lambda *args, **kwargs: self.files(*args, **kwargs))

Check failure on line 295 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: "Codebase[TSourceFile, TDirectory, TSymbol, TClass, TFunction, TImport, TGlobalVar, TInterface, TTypeAlias, TParameter, TCodeBlock]" has no attribute "G" [attr-defined]

@property
def directories(self) -> list[TDirectory]:
Expand All @@ -304,7 +304,7 @@
Returns:
list[TDirectory]: A list of Directory objects in the codebase.
"""
return list(self.ctx.directories.values())

Check failure on line 307 in src/codegen/sdk/core/codebase.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "list" has incompatible type "dict_values[Path, Directory[Any, Any, Any, Any, Any, Any, Any]]"; expected "Iterable[TDirectory]" [arg-type]

@property
def imports(self) -> list[TImport]:
Expand Down Expand Up @@ -1251,7 +1251,7 @@
tmp_dir (Optional[str]): The directory to clone the repo into. Defaults to /tmp/codegen
commit (Optional[str]): The specific commit hash to clone. Defaults to HEAD
shallow (bool): Whether to do a shallow clone. Defaults to True
language (Literal["python", "typescript"] | None): The programming language of the repo. Defaults to None.
language (Literal["python", "typescript"] | ProgrammingLanguage | None): The programming language of the repo. Defaults to None.
config (CodebaseConfig): Configuration for the codebase. Defaults to DefaultConfig.

Returns:
Expand Down
18 changes: 8 additions & 10 deletions src/codegen/sdk/system-prompt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,6 @@ Construct a Codebase by passing in a path to a local `git` repository or any sub

```python
from codegen import Codebase
from codegen.shared.enums.programming_language import ProgrammingLanguage

# Parse from a git repository root
codebase = Codebase("path/to/repository")
Expand All @@ -1362,12 +1361,12 @@ codebase = Codebase("path/to/repository/src/subfolder")
codebase = Codebase("./")

# Specify programming language (instead of inferring from file extensions)
codebase = Codebase("./", programming_language=ProgrammingLanguage.TYPESCRIPT)
codebase = Codebase("./", language="typescript")
```

<Note>
By default, Codegen will automatically infer the programming language of the codebase and
parse all files in the codebase. You can override this by passing the `programming_language` parameter
parse all files in the codebase. You can override this by passing the `language` parameter
with a value from the `ProgrammingLanguage` enum.
</Note>

Expand All @@ -1383,7 +1382,6 @@ To fetch and parse a repository directly from GitHub, use the `from_repo` functi

```python
import codegen
from codegen.shared.enums.programming_language import ProgrammingLanguage

# Fetch and parse a repository (defaults to /tmp/codegen/{repo_name})
codebase = codegen.from_repo('fastapi/fastapi')
Expand All @@ -1394,7 +1392,7 @@ codebase = codegen.from_repo(
tmp_dir='/custom/temp/dir', # Optional: custom temp directory
commit='786a8ada7ed0c7f9d8b04d49f24596865e4b7901', # Optional: specific commit
shallow=False, # Optional: full clone instead of shallow
programming_language=ProgrammingLanguage.PYTHON # Optional: override language detection
language="python" # Optional: override language detection
)
```

Expand Down Expand Up @@ -1446,7 +1444,6 @@ from codegen import Codebase
from codegen.git.repo_operator.local_repo_operator import LocalRepoOperator
from codegen.git.schemas.repo_config import BaseRepoConfig
from codegen.sdk.codebase.config import ProjectConfig
from codegen.shared.enums.programming_language import ProgrammingLanguage

codebase = Codebase(
projects = [
Expand All @@ -1456,7 +1453,7 @@ codebase = Codebase(
repo_config=BaseRepoConfig(),
bot_commit=True
),
programming_language=ProgrammingLanguage.TYPESCRIPT,
language="typescript",
base_path="src/codegen/sdk/typescript",
subdirectories=["src/codegen/sdk/typescript"]
)
Expand Down Expand Up @@ -6606,14 +6603,13 @@ First, we initialize the codebase and create a vector index for semantic search:
```python
from codegen import Codebase
from codegen.extensions import VectorIndex
from codegen.shared.enums.programming_language import ProgrammingLanguage

def initialize_codebase():
"""Initialize and index the codebase."""
# Initialize codebase with smart caching
codebase = Codebase.from_repo(
"codegen-sh/codegen-sdk",
programming_language=ProgrammingLanguage.PYTHON,
language="python",
tmp_dir="/root"
)

Expand Down Expand Up @@ -6795,6 +6791,7 @@ While this example demonstrates a simple RAG-based bot, you can extend it to bui
</Card>
</CardGroup>


---
title: "Mining Training Data for LLMs"
sidebarTitle: "Mining Data"
Expand Down Expand Up @@ -10651,7 +10648,7 @@ from codegen.extensions.tools.semantic_search import semantic_search

@mcp.tool('codebase_semantic_search', "search codebase with the provided query")
def search(query: Annotated[str, "search query to run against codebase"]):
codebase = Codebase("provide location to codebase", programming_language="provide codebase Language")
codebase = Codebase("provide location to codebase", language="provide codebase Language")
# use the semantic search tool from codegen.extensions.tools OR write your own
results = semantic_search(codebase=codebase, query=query)
return results
Expand All @@ -10672,6 +10669,7 @@ If you'd like to integrate this into an IDE checkout out this [setup guide](/int
And that's a wrap, chime in at our [community
Slack](https://community.codegen.com) if you have questions or ideas for additional MCP tools/capabilities


---
title: "Neo4j Graph"
sidebarTitle: "Neo4j Graph"
Expand Down