@@ -773,9 +773,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
773
773
}
774
774
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
775
775
CUDA_MEMCPY2D cpy_desc = {};
776
- cpy_desc.srcXInBytes = srcOffset.x ;
776
+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
777
777
cpy_desc.srcY = srcOffset.y ;
778
- cpy_desc.dstXInBytes = dstOffset.x ;
778
+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
779
779
cpy_desc.dstY = dstOffset.y ;
780
780
if (pImageDesc->rowPitch == 0 ) {
781
781
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -788,21 +788,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
788
788
}
789
789
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
790
790
cpy_desc.dstHost = pDst;
791
+ cpy_desc.dstPitch = hostExtent.width * PixelSizeBytes;
791
792
cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
792
793
cpy_desc.Height = copyExtent.height ;
793
794
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
794
795
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
795
796
CUDA_MEMCPY3D cpy_desc = {};
796
- cpy_desc.srcXInBytes = srcOffset.x ;
797
+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
797
798
cpy_desc.srcY = srcOffset.y ;
798
799
cpy_desc.srcZ = srcOffset.z ;
799
- cpy_desc.dstXInBytes = dstOffset.x ;
800
+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
800
801
cpy_desc.dstY = dstOffset.y ;
801
802
cpy_desc.dstZ = dstOffset.z ;
802
803
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
803
804
cpy_desc.srcArray = (CUarray)pSrc;
804
805
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
805
806
cpy_desc.dstHost = pDst;
807
+ cpy_desc.dstPitch = hostExtent.width * PixelSizeBytes;
808
+ cpy_desc.dstHeight = hostExtent.height ;
806
809
cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
807
810
cpy_desc.Height = copyExtent.height ;
808
811
cpy_desc.Depth = copyExtent.depth ;
@@ -811,16 +814,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
811
814
pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
812
815
pImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
813
816
CUDA_MEMCPY3D cpy_desc = {};
814
- cpy_desc.srcXInBytes = srcOffset.x ;
817
+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
815
818
cpy_desc.srcY = srcOffset.y ;
816
819
cpy_desc.srcZ = srcOffset.z ;
817
- cpy_desc.dstXInBytes = dstOffset.x ;
820
+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
818
821
cpy_desc.dstY = dstOffset.y ;
819
822
cpy_desc.dstZ = dstOffset.z ;
820
823
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
821
824
cpy_desc.srcArray = (CUarray)pSrc;
822
825
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
823
826
cpy_desc.dstHost = pDst;
827
+ cpy_desc.dstPitch = hostExtent.width * PixelSizeBytes;
828
+ cpy_desc.dstHeight = hostExtent.height ;
824
829
cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
825
830
cpy_desc.Height = std::max (uint64_t {1 }, copyExtent.height );
826
831
cpy_desc.Depth = pImageDesc->arraySize ;
@@ -834,9 +839,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
834
839
// the end
835
840
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
836
841
CUDA_MEMCPY2D cpy_desc = {};
837
- cpy_desc.srcXInBytes = srcOffset.x ;
842
+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
838
843
cpy_desc.srcY = 0 ;
839
- cpy_desc.dstXInBytes = dstOffset.x ;
844
+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
840
845
cpy_desc.dstY = 0 ;
841
846
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
842
847
cpy_desc.srcArray = (CUarray)pSrc;
@@ -847,9 +852,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
847
852
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
848
853
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
849
854
CUDA_MEMCPY2D cpy_desc = {};
850
- cpy_desc.srcXInBytes = srcOffset.x ;
855
+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
851
856
cpy_desc.srcY = srcOffset.y ;
852
- cpy_desc.dstXInBytes = dstOffset.x ;
857
+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
853
858
cpy_desc.dstY = dstOffset.y ;
854
859
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
855
860
cpy_desc.srcArray = (CUarray)pSrc;
@@ -860,10 +865,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
860
865
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
861
866
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
862
867
CUDA_MEMCPY3D cpy_desc = {};
863
- cpy_desc.srcXInBytes = srcOffset.x ;
868
+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
864
869
cpy_desc.srcY = srcOffset.y ;
865
870
cpy_desc.srcZ = srcOffset.z ;
866
- cpy_desc.dstXInBytes = dstOffset.x ;
871
+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
867
872
cpy_desc.dstY = dstOffset.y ;
868
873
cpy_desc.dstZ = dstOffset.z ;
869
874
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -878,10 +883,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
878
883
pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
879
884
pImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
880
885
CUDA_MEMCPY3D cpy_desc = {};
881
- cpy_desc.srcXInBytes = srcOffset.x ;
886
+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
882
887
cpy_desc.srcY = srcOffset.y ;
883
888
cpy_desc.srcZ = srcOffset.z ;
884
- cpy_desc.dstXInBytes = dstOffset.x ;
889
+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
885
890
cpy_desc.dstY = dstOffset.y ;
886
891
cpy_desc.dstZ = dstOffset.z ;
887
892
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
0 commit comments