File tree
6 files changed
+39
-39
lines changed- backends/xnnpack
- operators
- utils
- examples/models
- llama
- source_transformation
- phi_4_mini
- third-party
6 files changed
+39
-39
lines changedLines changed: 3 additions & 9 deletions
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
185 | 185 |
| |
186 | 186 |
| |
187 | 187 |
| |
188 |
| - | |
| 188 | + | |
189 | 189 |
| |
190 | 190 |
| |
191 | 191 |
| |
192 |
| - | |
193 |
| - | |
194 |
| - | |
| 192 | + | |
195 | 193 |
| |
196 | 194 |
| |
197 | 195 |
| |
| |||
202 | 200 |
| |
203 | 201 |
| |
204 | 202 |
| |
205 |
| - | |
206 |
| - | |
207 |
| - | |
208 |
| - | |
209 |
| - | |
| 203 | + | |
210 | 204 |
| |
211 | 205 |
| |
212 | 206 |
| |
|
Lines changed: 9 additions & 15 deletions
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
58 | 58 |
| |
59 | 59 |
| |
60 | 60 |
| |
61 |
| - | |
| 61 | + | |
| 62 | + | |
62 | 63 |
| |
63 |
| - | |
64 |
| - | |
65 |
| - | |
66 |
| - | |
| 64 | + | |
| 65 | + | |
67 | 66 |
| |
68 | 67 |
| |
69 |
| - | |
70 |
| - | |
71 |
| - | |
72 |
| - | |
| 68 | + | |
| 69 | + | |
73 | 70 |
| |
74 |
| - | |
75 |
| - | |
76 |
| - | |
77 |
| - | |
78 |
| - | |
| 71 | + | |
| 72 | + | |
79 | 73 |
| |
80 | 74 |
| |
81 | 75 |
| |
| |||
229 | 223 |
| |
230 | 224 |
| |
231 | 225 |
| |
232 |
| - | |
| 226 | + | |
233 | 227 |
| |
234 | 228 |
| |
235 | 229 |
| |
|
Lines changed: 1 addition & 1 deletion
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
416 | 416 |
| |
417 | 417 |
| |
418 | 418 |
| |
419 |
| - | |
| 419 | + | |
420 | 420 |
| |
421 | 421 |
| |
422 | 422 |
| |
|
Lines changed: 23 additions & 12 deletions
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
112 | 112 |
| |
113 | 113 |
| |
114 | 114 |
| |
115 |
| - | |
116 |
| - | |
117 |
| - | |
| 115 | + | |
| 116 | + | |
| 117 | + | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
118 | 122 |
| |
119 | 123 |
| |
120 | 124 |
| |
| |||
124 | 128 |
| |
125 | 129 |
| |
126 | 130 |
| |
127 |
| - | |
128 |
| - | |
| 131 | + | |
| 132 | + | |
| 133 | + | |
| 134 | + | |
| 135 | + | |
129 | 136 |
| |
130 | 137 |
| |
131 | 138 |
| |
| |||
777 | 784 |
| |
778 | 785 |
| |
779 | 786 |
| |
780 |
| - | |
| 787 | + | |
| 788 | + | |
781 | 789 |
| |
782 | 790 |
| |
783 | 791 |
| |
784 | 792 |
| |
785 |
| - | |
| 793 | + | |
786 | 794 |
| |
787 |
| - | |
| 795 | + | |
788 | 796 |
| |
789 | 797 |
| |
790 | 798 |
| |
791 | 799 |
| |
792 | 800 |
| |
793 | 801 |
| |
794 |
| - | |
| 802 | + | |
795 | 803 |
| |
796 |
| - | |
| 804 | + | |
| 805 | + | |
| 806 | + | |
| 807 | + | |
797 | 808 |
| |
798 | 809 |
| |
799 | 810 |
| |
800 | 811 |
| |
801 | 812 |
| |
802 | 813 |
| |
803 | 814 |
| |
804 |
| - | |
| 815 | + | |
805 | 816 |
| |
806 | 817 |
| |
807 | 818 |
| |
808 | 819 |
| |
809 | 820 |
| |
810 | 821 |
| |
811 |
| - | |
| 822 | + | |
812 | 823 |
| |
813 | 824 |
| |
814 | 825 |
| |
|
Lines changed: 2 additions & 1 deletion
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
110 | 110 |
| |
111 | 111 |
| |
112 | 112 |
| |
113 |
| - | |
| 113 | + | |
| 114 | + | |
114 | 115 |
| |
115 | 116 |
| |
116 | 117 |
| |
|
- README.md+33-6
- benchmarks/microbenchmarks/utils.py+15-9
- ruff.toml+1
- scripts/hf_eval.py-253
- test/dtypes/test_affine_quantized.py+91-60
- test/integration/test_integration.py+14-9
- test/prototype/test_paretoq.py+52
- test/quantization/pt2e/test_duplicate_dq.py+324
- test/quantization/pt2e/test_graph_utils.py+134
- test/quantization/pt2e/test_metadata_porting.py+526
- test/quantization/pt2e/test_numeric_debugger.py+373
- test/quantization/pt2e/test_quantize_pt2e.py+2.8k
- test/quantization/pt2e/test_quantize_pt2e_qat.py+1.2k
- test/quantization/pt2e/test_representation.py+327
- test/quantization/pt2e/test_x86inductor_quantizer.py+2.9k
- test/quantization/pt2e/test_xnnpack_quantizer.py+1.1k
- test/quantization/test_quant_api.py+58-17
- test/quantization/test_quant_primitives.py+12-8
- torchao/_models/llama/generate.py+24-11
- torchao/dtypes/__init__.py+2-1
- torchao/dtypes/affine_quantized_tensor.py+6-1
- torchao/dtypes/affine_quantized_tensor_ops.py+21
- torchao/dtypes/uintx/__init__.py+4-2
- torchao/dtypes/uintx/int4_xpu_layout.py+445
- torchao/dtypes/uintx/packed_linear_int8_dynamic_activation_intx_weight_layout.py+158-148
- torchao/dtypes/uintx/q_dq_layout.py+182-5
- torchao/experimental/kernels/cpu/aarch64/matmul/channelwise_8bit_a_channelwise_8bit_b_1x8x16_f32_neondot-impl.h+9-5
- torchao/experimental/kernels/cpu/aarch64/matmul/channelwise_8bit_a_channelwise_8bit_b_4x8x8_f32_neondot-impl.h+411
- torchao/experimental/kernels/cpu/aarch64/matmul/fp32_a_input_channelwise_8bit_b_4x16x4_f32_impl.h+328
- torchao/experimental/kernels/cpu/aarch64/matmul/matmul.h+223
- torchao/experimental/kernels/cpu/aarch64/matmul/matmul_utils.h+83
- torchao/experimental/kernels/cpu/aarch64/tests/test_qmatmul.cpp+166-17
- torchao/experimental/kernels/cpu/interface/quantized_matmul.h+4-6
- torchao/experimental/kernels/cpu/interface/test_qmatmul_interface.cpp+28
- torchao/experimental/op_lib_utils.py+18
- torchao/experimental/ops/mps/mps_op_lib.py+46
- torchao/experimental/packed_linear_int8_dynamic_activation_intx_weight_layout.py-5
- torchao/experimental/quant_api.py+124-597
- torchao/experimental/quant_passes.py+20-17
- torchao/experimental/tests/test_embedding_xbit_quantizer.py+25-25
- torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py+133-83
- torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py-111
- torchao/experimental/tests/test_quant_passes.py+32-25
- torchao/kernel/intmm.py+2-2
- torchao/prototype/hqq/hqq_tinygemm_linear.py+3-3
- torchao/prototype/paretoq/1_run_train.sh+35
- torchao/prototype/paretoq/2_run_eval.sh+38
- torchao/prototype/paretoq/README.md+79
- torchao/prototype/paretoq/__init__.py
- torchao/prototype/paretoq/main_result_234bit.jpg
- torchao/prototype/paretoq/main_result_scaling_law.jpg
- torchao/prototype/paretoq/main_result_ternary.jpg
- torchao/prototype/paretoq/models/__init__.py
- torchao/prototype/paretoq/models/configuration_llama.py+231
- torchao/prototype/paretoq/models/modeling_llama_quant.py+1.2k
- torchao/prototype/paretoq/models/utils_quant.py+289
- torchao/prototype/paretoq/requirement.txt+5
- torchao/prototype/paretoq/train.py+122
- torchao/prototype/paretoq/utils/datautils.py+120
- torchao/prototype/paretoq/utils/process_args.py+94
- torchao/prototype/paretoq/utils/utils.py+58
- torchao/quantization/pt2e/__init__.py+175
- torchao/quantization/pt2e/fake_quantize.py+654
- torchao/quantization/pt2e/observer.py+2.1k
- torchao/quantization/pt2e/pt2e/__init__.py
- torchao/quantization/pt2e/pt2e/_affine_quantization.py+813
- torchao/quantization/pt2e/pt2e/_numeric_debugger.py+348
- torchao/quantization/pt2e/pt2e/constant_fold.py+410
- torchao/quantization/pt2e/pt2e/convert.py+1.4k
- torchao/quantization/pt2e/pt2e/duplicate_dq_pass.py+88
- torchao/quantization/pt2e/pt2e/export_utils.py+246
- torchao/quantization/pt2e/pt2e/graph_utils.py+186
- torchao/quantization/pt2e/pt2e/lowering.py+65
- torchao/quantization/pt2e/pt2e/port_metadata_pass.py+230
- torchao/quantization/pt2e/pt2e/prepare.py+668
- torchao/quantization/pt2e/pt2e/qat_utils.py+997
- torchao/quantization/pt2e/pt2e/representation/__init__.py+5
- torchao/quantization/pt2e/pt2e/representation/rewrite.py+835
- torchao/quantization/pt2e/pt2e/utils.py+611
- torchao/quantization/pt2e/quant_type.py+40
- torchao/quantization/pt2e/quantize_pt2e.py+282
- torchao/quantization/pt2e/quantizer/__init__.py+21
- torchao/quantization/pt2e/quantizer/composable_quantizer.py+84
- torchao/quantization/pt2e/quantizer/embedding_quantizer.py+103
- torchao/quantization/pt2e/quantizer/quantizer.py+189
- torchao/quantization/pt2e/quantizer/utils.py+89
- torchao/quantization/pt2e/quantizer/x86_inductor_quantizer.py+1.6k
- torchao/quantization/pt2e/quantizer/xnnpack_quantizer.py+454
- torchao/quantization/pt2e/quantizer/xnnpack_quantizer_utils.py+1.1k
- torchao/quantization/pt2e/quantizer/xpu_inductor_quantizer.py+131
- torchao/quantization/pt2e/utils.py+851
- torchao/quantization/quant_api.py+190-18
- torchao/quantization/quant_primitives.py+15-17
- torchao/quantization/subclass.py+91-9
- torchao/quantization/utils.py+60-12
- torchao/testing/pt2e/__init__.py
- torchao/testing/pt2e/utils.py+172
- torchao/utils.py+14
- tutorials/developer_api_guide/export_to_executorch.py+5-9
0 commit comments