Skip to content

Commit 12fb1c5

Browse files
committed
cuda : tweak mm stride to double perf on P40 + GTX 970
1 parent 3e73d31 commit 12fb1c5

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUA
467467
#define GGML_CUDA_PEER_MAX_BATCH_SIZE 128
468468
#endif // GGML_CUDA_PEER_MAX_BATCH_SIZE
469469

470-
#define MUL_MAT_SRC1_COL_STRIDE 128
470+
#define MUL_MAT_SRC1_COL_STRIDE 4096
471471

472472
#define MAX_STREAMS 8
473473
static cudaStream_t g_cudaStreams[GGML_CUDA_MAX_DEVICES][MAX_STREAMS] = { { nullptr } };

0 commit comments

Comments
 (0)