Default to CUDA if available (#431)

malfet · malfet · commit 615954afbb70 · 2024-07-16T22:43:51.000-07:00
Unless user specifies device, default it to CUDA if it's available on the platform.

As for all models, other than TinyLlama inference on GPU will be faster than on CPU
diff --git a/build/builder.py b/build/builder.py
@@ -32,13 +32,16 @@ class BuilderArgs:
     gguf_kwargs: Optional[Dict[str, Any]] = None
     dso_path: Optional[Union[Path, str]] = None
     pte_path: Optional[Union[Path, str]] = None
-    device: str = "cpu"
+    device: Optional[str] = None
     precision: torch.dtype = torch.float32
     setup_caches: bool = False
     use_tp: bool = False
     is_chat_model: bool = False
 
     def __post_init__(self):
+        if self.device is None:
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            
         if not (
             (self.checkpoint_path and self.checkpoint_path.is_file())
             or (self.checkpoint_dir and self.checkpoint_dir.is_dir())