1 file changed
+1
-1
lines changed- .github/workflows/dashboard_perf_test.yml+28-5
- .github/workflows/ruff_linter.yml+4-1
- dev-requirements.txt+8-1
- examples/sam2_amg_server/server.py+31-7
- scripts/convert_hf_checkpoint.py+3-3
- scripts/download_sam2_ckpts.sh+68
- scripts/run_ruff_fix.sh+6
- test/dtypes/test_affine_quantized.py+15-3
- test/dtypes/test_affine_quantized_tensor_parallel.py+28
- test/integration/test_integration.py+43
- test/prototype/test_codebook_quant.py+67
- test/quantization/test_qat.py+153
- test/quantization/test_quant_primitives.py+100-61
- test/sparsity/test_wanda.py+33
- torchao/_models/llama/benchmarks.sh+15
- torchao/_models/llama/eval.py+4
- torchao/_models/llama/generate.py+63-62
- torchao/_models/llama/model.py+4-1
- torchao/_models/sam/eval_combo.py+20-1
- torchao/_models/sam/setup.sh-1
- torchao/_models/utils.py+98
- torchao/dtypes/affine_quantized_tensor.py+13-2
- torchao/dtypes/affine_quantized_tensor_ops.py+8
- torchao/dtypes/uintx/__init__.py+3-1
- torchao/dtypes/uintx/gemlite_layout.py+387
- torchao/dtypes/uintx/int4_cpu_layout.py+263
- torchao/dtypes/uintx/tensor_core_tiled_layout.py-248
- torchao/experimental/kernels/mps/metal.yaml+7-4
- torchao/experimental/kernels/mps/metal/common.metal+15
- torchao/experimental/kernels/mps/metal/divbit.metal-109
- torchao/experimental/kernels/mps/metal/int1mm.metal+29-32
- torchao/experimental/kernels/mps/metal/int2mm_opt.metal+138
- torchao/experimental/kernels/mps/metal/int3mm_opt.metal+147
- torchao/experimental/kernels/mps/metal/int4mm_opt.metal+180
- torchao/experimental/kernels/mps/metal/int5mm.metal+1-1
- torchao/experimental/kernels/mps/metal/int6mm.metal+21-2
- torchao/experimental/kernels/mps/metal/int7mm.metal+1-1
- torchao/experimental/kernels/mps/src/dispatch.h+14
- torchao/experimental/kernels/mps/src/lowbit.h+4-3
- torchao/experimental/kernels/mps/test/test_lowbit.mm+17-16
- torchao/experimental/ops/mps/CMakeLists.txt+2-1
- torchao/experimental/ops/mps/linear_fp_act_xbit_weight_aten.mm+2
- torchao/experimental/ops/mps/linear_fp_act_xbit_weight_executorch.mm+2
- torchao/experimental/ops/mps/test/test_lowbit.py+17-16
- torchao/experimental/ops/mps/test/test_quantizer.py+4-4
- torchao/float8/fsdp_utils.py+1-1
- torchao/prototype/float8nocompile/.gitignore+1
- torchao/prototype/float8nocompile/README.md+3
- torchao/prototype/float8nocompile/examples/example.py+33
- torchao/prototype/float8nocompile/float8nocompile_linear.py+146
- torchao/prototype/float8nocompile/float8nocompile_linear_utils.py+46
- torchao/prototype/float8nocompile/float8nocompile_scaling_utils.py+61
- torchao/prototype/quantization/autoquant_v2.py+1-1
- torchao/prototype/quantization/codebook/__init__.py+14
- torchao/prototype/quantization/codebook/codebook_ops.py+438
- torchao/prototype/quantization/codebook/codebook_quantized_tensor.py+288
- torchao/prototype/quantization/subgraph_utils/__init__.py
- torchao/quantization/README.md+29-1
- torchao/quantization/__init__.py+5-1
- torchao/quantization/autoquant.py+11-7
- torchao/quantization/qat/__init__.py+4
- torchao/quantization/qat/api.py+57-1
- torchao/quantization/qat/embedding.py+28-3
- torchao/quantization/qat/linear.py+22
- torchao/quantization/quant_api.py+64-5
- torchao/quantization/quant_primitives.py+29-5
- torchao/quantization/utils.py+4-7
- torchao/sparsity/wanda.py+22-4
0 commit comments