Skip to content

Commit 08acafb

Browse files
authored
Merge pull request #1714 from cppchedy/chedy/fix-copy-in-bytes-bug
[CUDA][Bindless][Exp] Fix subregion copies
2 parents 1e9b1b4 + 8e5a33b commit 08acafb

File tree

1 file changed

+19
-14
lines changed

1 file changed

+19
-14
lines changed

source/adapters/cuda/image.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -773,9 +773,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
773773
}
774774
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
775775
CUDA_MEMCPY2D cpy_desc = {};
776-
cpy_desc.srcXInBytes = srcOffset.x;
776+
cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
777777
cpy_desc.srcY = srcOffset.y;
778-
cpy_desc.dstXInBytes = dstOffset.x;
778+
cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
779779
cpy_desc.dstY = dstOffset.y;
780780
if (pImageDesc->rowPitch == 0) {
781781
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -788,21 +788,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
788788
}
789789
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
790790
cpy_desc.dstHost = pDst;
791+
cpy_desc.dstPitch = hostExtent.width * PixelSizeBytes;
791792
cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width;
792793
cpy_desc.Height = copyExtent.height;
793794
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
794795
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
795796
CUDA_MEMCPY3D cpy_desc = {};
796-
cpy_desc.srcXInBytes = srcOffset.x;
797+
cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
797798
cpy_desc.srcY = srcOffset.y;
798799
cpy_desc.srcZ = srcOffset.z;
799-
cpy_desc.dstXInBytes = dstOffset.x;
800+
cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
800801
cpy_desc.dstY = dstOffset.y;
801802
cpy_desc.dstZ = dstOffset.z;
802803
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
803804
cpy_desc.srcArray = (CUarray)pSrc;
804805
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
805806
cpy_desc.dstHost = pDst;
807+
cpy_desc.dstPitch = hostExtent.width * PixelSizeBytes;
808+
cpy_desc.dstHeight = hostExtent.height;
806809
cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width;
807810
cpy_desc.Height = copyExtent.height;
808811
cpy_desc.Depth = copyExtent.depth;
@@ -811,16 +814,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
811814
pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
812815
pImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
813816
CUDA_MEMCPY3D cpy_desc = {};
814-
cpy_desc.srcXInBytes = srcOffset.x;
817+
cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
815818
cpy_desc.srcY = srcOffset.y;
816819
cpy_desc.srcZ = srcOffset.z;
817-
cpy_desc.dstXInBytes = dstOffset.x;
820+
cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
818821
cpy_desc.dstY = dstOffset.y;
819822
cpy_desc.dstZ = dstOffset.z;
820823
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
821824
cpy_desc.srcArray = (CUarray)pSrc;
822825
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
823826
cpy_desc.dstHost = pDst;
827+
cpy_desc.dstPitch = hostExtent.width * PixelSizeBytes;
828+
cpy_desc.dstHeight = hostExtent.height;
824829
cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width;
825830
cpy_desc.Height = std::max(uint64_t{1}, copyExtent.height);
826831
cpy_desc.Depth = pImageDesc->arraySize;
@@ -834,9 +839,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
834839
// the end
835840
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
836841
CUDA_MEMCPY2D cpy_desc = {};
837-
cpy_desc.srcXInBytes = srcOffset.x;
842+
cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
838843
cpy_desc.srcY = 0;
839-
cpy_desc.dstXInBytes = dstOffset.x;
844+
cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
840845
cpy_desc.dstY = 0;
841846
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
842847
cpy_desc.srcArray = (CUarray)pSrc;
@@ -847,9 +852,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
847852
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
848853
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
849854
CUDA_MEMCPY2D cpy_desc = {};
850-
cpy_desc.srcXInBytes = srcOffset.x;
855+
cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
851856
cpy_desc.srcY = srcOffset.y;
852-
cpy_desc.dstXInBytes = dstOffset.x;
857+
cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
853858
cpy_desc.dstY = dstOffset.y;
854859
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
855860
cpy_desc.srcArray = (CUarray)pSrc;
@@ -860,10 +865,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
860865
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
861866
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
862867
CUDA_MEMCPY3D cpy_desc = {};
863-
cpy_desc.srcXInBytes = srcOffset.x;
868+
cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
864869
cpy_desc.srcY = srcOffset.y;
865870
cpy_desc.srcZ = srcOffset.z;
866-
cpy_desc.dstXInBytes = dstOffset.x;
871+
cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
867872
cpy_desc.dstY = dstOffset.y;
868873
cpy_desc.dstZ = dstOffset.z;
869874
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -878,10 +883,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
878883
pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
879884
pImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
880885
CUDA_MEMCPY3D cpy_desc = {};
881-
cpy_desc.srcXInBytes = srcOffset.x;
886+
cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
882887
cpy_desc.srcY = srcOffset.y;
883888
cpy_desc.srcZ = srcOffset.z;
884-
cpy_desc.dstXInBytes = dstOffset.x;
889+
cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
885890
cpy_desc.dstY = dstOffset.y;
886891
cpy_desc.dstZ = dstOffset.z;
887892
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;

0 commit comments

Comments
 (0)