Skip to content

add copy_ dispatch and some tests #45

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion test/modules/test_nf4_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from torch.testing._internal.common_utils import TestCase
from torchao.dtypes.nf4tensor import linear_nf4, NF4Tensor
import torch.nn.functional as F

import io
from collections import OrderedDict

bnb_available = False

Expand Down Expand Up @@ -44,6 +45,16 @@ def _build_bnb_linear(input_weight, device):


class TestNF4Linear(TestCase):
class TestMod(nn.Module):
def __init__(self, tensor, block_size, scaler_block_size):
super().__init__()
self.param = torch.nn.Parameter(NF4Tensor.from_tensor(tensor, block_size, scaler_block_size))

def save_state_dict_to_buffer(self, state_dict: OrderedDict):
buffer = io.BytesIO()
torch.save(state_dict, buffer)
buffer.seek(0)
return buffer

def test_register_nf4_as_param(self):
nf4_tensor = NF4Tensor.from_tensor(
Expand Down Expand Up @@ -121,6 +132,43 @@ def test_nf4_bnb_linear(self):
assert err_native < 0.5 * dim
assert err_bnb < 0.5 * dim

@unittest.skipIf(not torch.cuda.is_available(), "Need cuda for test")
def test_load_from_bfloat16(self):
"""Tests loading to and from different module state dicts"""
inpt_tensor = torch.rand(64, device='cuda', dtype=torch.bfloat16)
base_mod = self.TestMod(inpt_tensor, 32, 2)

bf16_dummy_dict = {"param": inpt_tensor}
base_mod.load_state_dict(bf16_dummy_dict)

assert base_mod.param.block_size == 32
assert base_mod.param.scaler_block_size == 2

@unittest.skipIf(not torch.cuda.is_available(), "Need cuda for test")
def test_load_from_nf4_same_meta(self):
"""Tests loading to and from different module state dicts"""
inpt_tensor = torch.rand(64, device='cuda', dtype=torch.bfloat16)
base_mod = self.TestMod(inpt_tensor, 32, 2)
state_dict = base_mod.state_dict()
saved_state_dict = self.save_state_dict_to_buffer(state_dict)

other_mod = self.TestMod(inpt_tensor, 32, 2)
other_mod.load_state_dict(torch.load(saved_state_dict))
assert other_mod.param.block_size == 32
assert other_mod.param.scaler_block_size == 2

@unittest.skipIf(not torch.cuda.is_available(), "Need cuda for test")
def test_load_from_nf4_diff_meta(self):
"""Tests loading to and from different module state dicts"""
inpt_tensor = torch.rand(128, device='cuda', dtype=torch.bfloat16)
base_mod = self.TestMod(inpt_tensor, 32, 2)
state_dict = base_mod.state_dict()
saved_state_dict = self.save_state_dict_to_buffer(state_dict)

other_mod = self.TestMod(inpt_tensor, 64, 1)
other_mod.load_state_dict(torch.load(saved_state_dict))
assert other_mod.param.block_size == 64
assert other_mod.param.scaler_block_size == 1

if __name__ == "__main__":
unittest.main()
36 changes: 36 additions & 0 deletions torchao/dtypes/nf4tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@
NF4_OPS_TABLE: Dict[Any, Any] = {}


def same_metadata(a: "NF4Tensor", b: "NF4Tensor"):
both_nf4 = isinstance(a, NF4Tensor) and isinstance(b, NF4Tensor)
return (
both_nf4 and
a.block_size == b.block_size
and a.scaler_block_size == b.scaler_block_size
and a.n_blocks == b.n_blocks
)

def implements(aten_ops):
"""Use this decorator to implement a function for an aten op in __torch_dispatch__"""
Expand All @@ -29,6 +37,34 @@ def noop_detach(func, *args, **kwargs):
return args[0][0]


@implements(
[
aten.copy_.default,
]
)
def copy_(func, *args, **kwargs):
original: NF4Tensor = args[0][0]
copy_in: torch.Tensor = args[0][1]

# Base Case
if same_metadata(original, copy_in):
original_tensors = original.__tensor_flatten__()[0]
for tensor_name in original_tensors:
getattr(original, tensor_name).copy_(getattr(copy_in, tensor_name))
return

# Convert Non NF4Tensor into NF4 for copy in
if not isinstance(copy_in, NF4Tensor):
copy_in_nf4 = NF4Tensor.from_tensor(copy_in, original.block_size, original.scaler_block_size)
return original.copy_(copy_in_nf4)

# Other Tensor is not a NF4Tensor
full_precision = copy_in.get_original_weight()
same_meta_nf4 = NF4Tensor.from_tensor(
full_precision, original.block_size, original.scaler_block_size
)
return original.copy_(same_meta_nf4)

@dataclass
class SubclassTensorArgs:
original_shape: torch.Size
Expand Down