Skip to content

Commit 2834802

Browse files
author
tomcodgen
authored
Demo for codebase statistics (#726)
1 parent d3a0d78 commit 2834802

File tree

10 files changed

+2179
-380
lines changed

10 files changed

+2179
-380
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import sys
2+
import traceback
3+
import os
4+
5+
from codegen import Codebase
6+
from codegen.extensions.attribution.cli import run
7+
from codegen.git.repo_operator.repo_operator import RepoOperator
8+
from codegen.git.schemas.repo_config import RepoConfig
9+
from codegen.sdk.codebase.config import ProjectConfig
10+
from codegen.shared.enums.programming_language import ProgrammingLanguage
11+
12+
if __name__ == "__main__":
13+
try:
14+
print("Initializing codebase...")
15+
16+
# Option A: Use current directory if it's a git repository
17+
if os.path.exists(".git"):
18+
print("Using current directory as repository...")
19+
# Create a repo operator for the current directory
20+
repo_path = os.getcwd()
21+
repo_config = RepoConfig.from_repo_path(repo_path)
22+
repo_operator = RepoOperator(repo_config=repo_config)
23+
24+
# Initialize codebase with a project config
25+
project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON)
26+
codebase = Codebase(projects=[project])
27+
else:
28+
# Option B: Try to find a git repository in parent directories
29+
print("Searching for git repository in parent directories...")
30+
current_dir = os.getcwd()
31+
found_git = False
32+
33+
while current_dir != os.path.dirname(current_dir): # Stop at root
34+
if os.path.exists(os.path.join(current_dir, ".git")):
35+
print(f"Found git repository at {current_dir}")
36+
repo_config = RepoConfig.from_repo_path(current_dir)
37+
repo_operator = RepoOperator(repo_config=repo_config)
38+
39+
# Initialize codebase with a project config
40+
project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON)
41+
codebase = Codebase(projects=[project])
42+
found_git = True
43+
break
44+
current_dir = os.path.dirname(current_dir)
45+
46+
if not found_git:
47+
# Option C: Use from_repo method which handles cloning
48+
print("No local git repository found. Cloning a repository...")
49+
codebase = Codebase.from_repo(repo_full_name="codegen-sh/codegen", language="python")
50+
51+
print(f"Codebase loaded with {len(codebase.files)} files and {len(codebase.symbols)} symbols")
52+
53+
# Run the analysis
54+
run(codebase)
55+
56+
except Exception as e:
57+
print(f"\n❌ Error: {str(e)}")
58+
print("\nTraceback:")
59+
traceback.print_exc()
60+
sys.exit(1)
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import os
2+
import sys
3+
4+
from codegen import Codebase
5+
from codegen.extensions.attribution.cli import run
6+
from codegen.extensions.attribution.main import add_attribution_to_symbols
7+
from codegen.git.repo_operator.repo_operator import RepoOperator
8+
from codegen.git.schemas.repo_config import RepoConfig
9+
from codegen.sdk.codebase.config import ProjectConfig
10+
from codegen.shared.enums.programming_language import ProgrammingLanguage
11+
12+
13+
def print_symbol_attribution(codebase):
14+
"""Print attribution information for symbols in the codebase."""
15+
print("\n🔍 Symbol Attribution Examples:")
16+
17+
# First, make sure attribution information is added to symbols
18+
ai_authors = ["devin[bot]", "codegen[bot]", "github-actions[bot]"]
19+
add_attribution_to_symbols(codebase, ai_authors)
20+
21+
# Get some interesting symbols to examine
22+
# Let's look at classes and functions with the most usages
23+
symbols_with_usages = []
24+
for symbol in codebase.symbols:
25+
if hasattr(symbol, "usages") and len(symbol.usages) > 0:
26+
symbols_with_usages.append((symbol, len(symbol.usages)))
27+
28+
# Sort by usage count (most used first)
29+
symbols_with_usages.sort(key=lambda x: x[1], reverse=True)
30+
31+
# Print attribution for top symbols
32+
count = 0
33+
for symbol, usage_count in symbols_with_usages[:10]: # Look at top 10 most used symbols
34+
count += 1
35+
print(f"\n📊 Symbol #{count}: {symbol.name} ({type(symbol).__name__})")
36+
print(f" • File: {symbol.filepath}")
37+
print(f" • Usages: {usage_count}")
38+
39+
# Print attribution information
40+
if hasattr(symbol, "last_editor"):
41+
print(f" • Last editor: {symbol.last_editor}")
42+
else:
43+
print(" • Last editor: Not available")
44+
45+
if hasattr(symbol, "editor_history") and symbol.editor_history:
46+
print(f" • Editor history: {', '.join(symbol.editor_history[:5])}" + (f" and {len(symbol.editor_history) - 5} more..." if len(symbol.editor_history) > 5 else ""))
47+
else:
48+
print(" • Editor history: Not available")
49+
50+
if hasattr(symbol, "is_ai_authored"):
51+
print(f" • AI authored: {'Yes' if symbol.is_ai_authored else 'No'}")
52+
else:
53+
print(" • AI authored: Not available")
54+
55+
56+
if __name__ == "__main__":
57+
try:
58+
print("Initializing codebase...")
59+
60+
# Use current directory if it's a git repository
61+
if os.path.exists(".git"):
62+
print("Using current directory as repository...")
63+
repo_path = os.getcwd()
64+
repo_config = RepoConfig.from_repo_path(repo_path)
65+
repo_operator = RepoOperator(repo_config=repo_config)
66+
67+
project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON)
68+
codebase = Codebase(projects=[project])
69+
else:
70+
# Use from_repo method for a well-known repository
71+
print("Using a sample repository...")
72+
codebase = Codebase.from_repo(
73+
repo_full_name="codegen-sh/codegen",
74+
# commit="", # Using a specific commit for consistency
75+
language="python",
76+
)
77+
78+
print(f"Codebase loaded with {len(codebase.files)} files and {len(codebase.symbols)} symbols")
79+
80+
# First run the analysis to gather attribution data
81+
print("\n🔍 Running AI impact analysis...")
82+
run(codebase)
83+
84+
# Then show examples of accessing attribution information
85+
print_symbol_attribution(codebase)
86+
87+
except Exception as e:
88+
print(f"\n❌ Error: {str(e)}")
89+
import traceback
90+
91+
traceback.print_exc()
92+
sys.exit(1)

0 commit comments

Comments
 (0)