codegen-sh · jemeza-codegen · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025
@@ -77,6 +77,7 @@ dependencies = [
   "urllib3>=2.0.0",
   "datasets",
   "colorlog>=6.9.0",
+  "langsmith",
 ]
 
 license = { text = "Apache-2.0" }

@@ -7,9 +7,6 @@
 
 from datasets import load_dataset
 
-# Add constant for cache directory
-CACHE_DIR = Path.home() / ".cache" / "swebench"
-
 
 class SWEBenchDataset(Enum):
     LITE = "princeton-nlp/SWE-bench_Lite"
@@ -68,35 +65,38 @@ def load_predictions(paths):
 
 
 def get_swe_bench_examples(
-    dataset: SWEBenchDataset = SWEBenchDataset.LITE,
+    dataset: Literal["lite", "full", "verified"] = "lite",
     split: Literal["train", "dev", "test"] = "test",
     offset: int = 0,
     length: int = 100,
     instance_id: str | None = None,
+    repo: str | None = None,
 ) -> list[SweBenchExample]:
     """Fetch examples from the SWE-bench dataset using the datasets library.
 
     Args:
-        dataset: The dataset to use (LITE, FULL, or VERIFIED)
+        dataset: The dataset to use ("lite", "full", or "verified")
         split: The dataset split to use
         offset: Starting index for examples
         length: Number of examples to fetch
+        instance_id: Optional specific instance ID to fetch
 
     Returns:
         List of SweBenchExample objects
     """
-    # Ensure cache directory exists
-    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    # Convert string dataset name to enum
+    dataset_map = {"lite": SWEBenchDataset.LITE, "full": SWEBenchDataset.FULL, "verified": SWEBenchDataset.VERIFIED}
+    dataset_enum = dataset_map[dataset.lower()]
 
     # Load the dataset with caching enabled
-    dataset_name = dataset.value
-    swe_bench_dataset = load_dataset(dataset_name, cache_dir=str(CACHE_DIR), download_mode="reuse_dataset_if_exists")
+    dataset_name = dataset_enum.value
+    swe_bench_dataset = load_dataset(dataset_name, download_mode="reuse_dataset_if_exists")
 
     # Get the requested split
     split_data = swe_bench_dataset[split]
 
     # Apply offset and length
-    if instance_id:
+    if instance_id or repo:
         offset = 0
         end_idx = len(split_data)
     else:
@@ -113,6 +113,8 @@ def get_swe_bench_examples(
     for row in selected_rows:
         if instance_id and row["instance_id"] != instance_id:
             continue
+        if repo and row["repo"] != repo:
+            continue
         example = SweBenchExample(
             repo=row["repo"],
             instance_id=row["instance_id"],
@@ -129,4 +131,4 @@ def get_swe_bench_examples(
         )
         examples.append(example)
 
-    return examples
+    return examples[:length]