@@ -90,7 +90,7 @@ class AdaptiveGPTQ:
90
90
perm_cpu : torch .tensor = None
91
91
invperm : torch .tensor = None
92
92
93
- g_idx : torch .tensor = None
93
+ # g_idx: torch.tensor = None
94
94
scale : torch .tensor = None
95
95
qscale : torch .tensor = None
96
96
qscale_max : torch .tensor = None
@@ -125,7 +125,7 @@ def drop_buffers(self):
125
125
self .perm = None
126
126
self .perm_cpu = None
127
127
self .invperm = None
128
- self .g_idx = None
128
+ # self.g_idx = None
129
129
self .scale = None
130
130
self .qscale = None
131
131
self .qscale_max = None
@@ -389,10 +389,10 @@ def quantize(self, keep_qweight = False, apply = False, drop = False):
389
389
# Create g_idx to store inverse activation order
390
390
391
391
# self.g_idx = torch.tensor(group_idx_list, dtype = torch.int32, device = self.device)
392
- self .g_idx = torch .tensor (group_idx_list , dtype = torch .int32 )
392
+ # self.g_idx = torch.tensor(group_idx_list, dtype = torch.int32)
393
393
394
- self .invperm = torch .argsort (self .perm_cpu )
395
- self .g_idx = self .g_idx [self .invperm ]
394
+ self .invperm = torch .argsort (self .perm )
395
+ # self.g_idx = self.g_idx[self.invperm]
396
396
397
397
# Store scales
398
398
@@ -449,7 +449,7 @@ def apply_temp(self):
449
449
450
450
def pack (self , key , qparams ):
451
451
452
- self .qgroups = self .qgroups .to ("cude :0" )
452
+ self .qgroups = self .qgroups .to ("cuda :0" )
453
453
# self.qscale_max = self.qscale_max.to("cude:0")
454
454
455
455
assert qparams .scale_bits in [4 ]
0 commit comments