@@ -168,9 +168,8 @@ G4_SendDescLdSt::G4_SendDescLdSt(
168
168
G4_Operand *surf,
169
169
ImmOff _immOff,
170
170
LdStAttrs _attrs)
171
- : G4_SendDesc(G4_SendDesc::Kind::LDST, sfid),
171
+ : G4_SendDesc(G4_SendDesc::Kind::LDST, sfid, _execSize ),
172
172
op(_op),
173
- execSize(_execSize),
174
173
//
175
174
addrType(at), addrBits(_addrBits), addrDims(_addrDims),
176
175
//
@@ -185,7 +184,7 @@ G4_SendDescLdSt::G4_SendDescLdSt(
185
184
static size_t toExecSlots (const G4_SendDescLdSt &d)
186
185
{
187
186
int minExecSize = 8 ;
188
- int execSlots = std::max ((int )d.execSize , minExecSize);
187
+ int execSlots = std::max ((int )d.getExecSize () , minExecSize);
189
188
return (size_t )execSlots;
190
189
}
191
190
@@ -586,6 +585,149 @@ bool G4_SendDescRaw::is16BitReturn() const
586
585
return desc.layout .returnFormat == 1 ;
587
586
}
588
587
588
+ bool G4_SendDescRaw::isByteScatterRW () const
589
+ {
590
+ auto funcID = getSFID ();
591
+ switch (funcID) {
592
+ case SFID::DP_DC0:
593
+ switch (getHdcMessageType ()) {
594
+ case DC_BYTE_SCATTERED_READ:
595
+ case DC_BYTE_SCATTERED_WRITE:
596
+ return true ;
597
+ default :
598
+ break ;
599
+ }
600
+ break ;
601
+ case SFID::DP_DC1:
602
+ switch (getHdcMessageType ()) {
603
+ case DC1_A64_SCATTERED_READ:
604
+ case DC1_A64_SCATTERED_WRITE:
605
+ return (getBlockSize () == 1 );
606
+ default :
607
+ break ;
608
+ }
609
+ break ;
610
+ case SFID::DP_DC2:
611
+ switch (getHdcMessageType ()) {
612
+ case DC2_A64_SCATTERED_READ:
613
+ case DC2_A64_SCATTERED_WRITE:
614
+ return (getBlockSize () == 1 );
615
+ case DC2_BYTE_SCATTERED_READ:
616
+ case DC2_BYTE_SCATTERED_WRITE:
617
+ return true ;
618
+ default :
619
+ break ;
620
+ }
621
+ break ;
622
+ default :
623
+ break ;
624
+ }
625
+ return false ;
626
+ }
627
+
628
+ bool G4_SendDescRaw::isDWScatterRW () const
629
+ {
630
+ auto funcID = getSFID ();
631
+ switch (funcID) {
632
+ case SFID::DP_DC0:
633
+ switch (getHdcMessageType ()) {
634
+ case DC_DWORD_SCATTERED_READ:
635
+ case DC_DWORD_SCATTERED_WRITE:
636
+ return true ;
637
+ default :
638
+ break ;
639
+ }
640
+ break ;
641
+ case SFID::DP_DC1:
642
+ switch (getHdcMessageType ()) {
643
+ case DC1_A64_SCATTERED_READ:
644
+ case DC1_A64_SCATTERED_WRITE:
645
+ return (getBlockSize () == 4 );
646
+ default :
647
+ break ;
648
+ }
649
+ break ;
650
+ case SFID::DP_DC2:
651
+ switch (getHdcMessageType ()) {
652
+ case DC2_A64_SCATTERED_READ:
653
+ case DC2_A64_SCATTERED_WRITE:
654
+ return (getBlockSize () == 4 );
655
+ default :
656
+ break ;
657
+ }
658
+ break ;
659
+ default :
660
+ break ;
661
+ }
662
+ return false ;
663
+ }
664
+
665
+ bool G4_SendDescRaw::isQWScatterRW () const
666
+ {
667
+ auto funcID = getSFID ();
668
+ switch (funcID) {
669
+ case SFID::DP_DC0:
670
+ switch (getHdcMessageType ()) {
671
+ default :
672
+ break ;
673
+ }
674
+ break ;
675
+ case SFID::DP_DC1:
676
+ switch (getHdcMessageType ()) {
677
+ case DC1_A64_SCATTERED_READ:
678
+ case DC1_A64_SCATTERED_WRITE:
679
+ return (getBlockSize () == 8 );
680
+ default :
681
+ break ;
682
+ }
683
+ break ;
684
+ case SFID::DP_DC2:
685
+ switch (getHdcMessageType ()) {
686
+ case DC2_A64_SCATTERED_READ:
687
+ case DC2_A64_SCATTERED_WRITE:
688
+ return (getBlockSize () == 4 );
689
+ default :
690
+ break ;
691
+ }
692
+ break ;
693
+ default :
694
+ break ;
695
+ }
696
+ return false ;
697
+ }
698
+
699
+ bool G4_SendDescRaw::isUntypedRW () const
700
+ {
701
+ auto funcID = getSFID ();
702
+ switch (funcID) {
703
+ case SFID::DP_DC1:
704
+ switch (getHdcMessageType ()) {
705
+ case DC1_UNTYPED_SURFACE_READ:
706
+ case DC1_UNTYPED_SURFACE_WRITE:
707
+ case DC1_A64_UNTYPED_SURFACE_READ:
708
+ case DC1_A64_UNTYPED_SURFACE_WRITE:
709
+ return true ;
710
+ default :
711
+ break ;
712
+ }
713
+ break ;
714
+ case SFID::DP_DC2:
715
+ switch (getHdcMessageType ()) {
716
+ case DC2_UNTYPED_SURFACE_READ:
717
+ case DC2_UNTYPED_SURFACE_WRITE:
718
+ case DC2_A64_UNTYPED_SURFACE_READ:
719
+ case DC2_A64_UNTYPED_SURFACE_WRITE:
720
+ return true ;
721
+ default :
722
+ break ;
723
+ }
724
+ break ;
725
+ default :
726
+ break ;
727
+ }
728
+ return false ;
729
+ }
730
+
589
731
bool G4_SendDescRaw::isA64Message () const
590
732
{
591
733
if (!isHDC ()) {
@@ -880,6 +1022,17 @@ size_t G4_SendDescRaw::getDstLenBytes() const
880
1022
return 32 * getScratchRWSize (); // HWords
881
1023
} else if (isOwordLoad ()) {
882
1024
return 16 * getOwordsAccessed (); // OWords
1025
+ } else if (isByteScatterRW ()) {
1026
+ uint16_t nbytes = getBlockNum ();
1027
+ // assume 4 at least
1028
+ nbytes = (nbytes >= 4 ? nbytes : 4 );
1029
+ return nbytes * getExecSize ();
1030
+ } else if (isDWScatterRW ()) {
1031
+ return 4 * getBlockNum () * getExecSize ();
1032
+ } else if (isQWScatterRW ()) {
1033
+ return 8 * getBlockNum () * getExecSize ();
1034
+ } else if (isUntypedRW ()) {
1035
+ return 4 * getEnabledChannelNum () * getExecSize ();
883
1036
} else {
884
1037
// fallback to the raw GRF count
885
1038
return ResponseLength () * (size_t )getGRFSize ();
0 commit comments