@@ -632,7 +632,7 @@ Syntax:
632
632
633
633
.. code-block :: llvm
634
634
635
- declare i32 @llvm.nvvm.prmt(i32 %a , i32 %b , i32 %c )
635
+ declare i32 @llvm.nvvm.prmt(i32 %lo , i32 %hi , i32 %selector )
636
636
637
637
Overview:
638
638
"""""""""
@@ -644,7 +644,7 @@ Semantics:
644
644
""""""""""
645
645
646
646
The bytes in the first two source operands are numbered from 0 to 7:
647
- {%b , %a } = {{b7, b6, b5, b4}, {b3, b2, b1, b0}}. For each byte in the target
647
+ {%hi , %lo } = {{b7, b6, b5, b4}, {b3, b2, b1, b0}}. For each byte in the target
648
648
register, a 4-bit selection value is defined.
649
649
650
650
The 3 lsbs of the selection value specify which of the 8 source bytes should be
@@ -653,7 +653,7 @@ copied, or if the sign (msb of the byte) should be replicated over all 8 bits
653
653
of the target position (sign extend of the byte value); msb=0 means copy the
654
654
literal value; msb=1 means replicate the sign.
655
655
656
- These 4-bit selection values are pulled from the lower 16-bits of the third
656
+ These 4-bit selection values are pulled from the lower 16-bits of the %selector
657
657
operand, with the least significant selection value corresponding to the least
658
658
significant byte of the destination.
659
659
@@ -666,13 +666,13 @@ Syntax:
666
666
667
667
.. code-block :: llvm
668
668
669
- declare i32 @llvm.nvvm.prmt.f4e(i32 %a , i32 %b , i32 %c )
670
- declare i32 @llvm.nvvm.prmt.b4e(i32 %a , i32 %b , i32 %c )
669
+ declare i32 @llvm.nvvm.prmt.f4e(i32 %lo , i32 %hi , i32 %selector )
670
+ declare i32 @llvm.nvvm.prmt.b4e(i32 %lo , i32 %hi , i32 %selector )
671
671
672
- declare i32 @llvm.nvvm.prmt.rc8(i32 %a , i32 %c )
673
- declare i32 @llvm.nvvm.prmt.ecl(i32 %a , i32 %c )
674
- declare i32 @llvm.nvvm.prmt.ecr(i32 %a , i32 %c )
675
- declare i32 @llvm.nvvm.prmt.rc16(i32 %a , i32 %c )
672
+ declare i32 @llvm.nvvm.prmt.rc8(i32 %lo , i32 %selector )
673
+ declare i32 @llvm.nvvm.prmt.ecl(i32 %lo , i32 %selector )
674
+ declare i32 @llvm.nvvm.prmt.ecr(i32 %lo , i32 %selector )
675
+ declare i32 @llvm.nvvm.prmt.rc16(i32 %lo , i32 %selector )
676
676
677
677
Overview:
678
678
"""""""""
@@ -685,64 +685,64 @@ Semantics:
685
685
""""""""""
686
686
687
687
As with the generic '``llvm.nvvm.prmt ``' intrinsic, the bytes in the first one
688
- or two source operands are numbered. The first source operand (%a ) is numbered
688
+ or two source operands are numbered. The first source operand (%lo ) is numbered
689
689
{b3, b2, b1, b0}, in the case of the '``f4e ``' and '``b4e ``' variants, the
690
- second source operand (%b ) is numbered {b7, b6, b5, b4}.
691
-
692
- Depending on the 2 least significant bits of the final operand, the result of
693
- the permutation is defined as follows:
694
-
695
- +------------+---------+--------------+
696
- | Mode | %c [1:0] | Output |
697
- +------------+---------+--------------+
698
- | '``f4e ``' | 0 | {3, 2, 1, 0} |
699
- | +---------+--------------+
700
- | | 1 | {4, 3, 2, 1} |
701
- | +---------+--------------+
702
- | | 2 | {5, 4, 3, 2} |
703
- | +---------+--------------+
704
- | | 3 | {6, 5, 4, 3} |
705
- +------------+---------+--------------+
706
- | '``b4e ``' | 0 | {5, 6, 7, 0} |
707
- | +---------+--------------+
708
- | | 1 | {6, 7, 0, 1} |
709
- | +---------+--------------+
710
- | | 2 | {7, 0, 1, 2} |
711
- | +---------+--------------+
712
- | | 3 | {0, 1, 2, 3} |
713
- +------------+---------+--------------+
714
- | '``rc8 ``' | 0 | {0, 0, 0, 0} |
715
- | +---------+--------------+
716
- | | 1 | {1, 1, 1, 1} |
717
- | +---------+--------------+
718
- | | 2 | {2, 2, 2, 2} |
719
- | +---------+--------------+
720
- | | 3 | {3, 3, 3, 3} |
721
- +------------+---------+--------------+
722
- | '``ecl ``' | 0 | {3, 2, 1, 0} |
723
- | +---------+--------------+
724
- | | 1 | {3, 2, 1, 1} |
725
- | +---------+--------------+
726
- | | 2 | {3, 2, 2, 2} |
727
- | +---------+--------------+
728
- | | 3 | {3, 3, 3, 3} |
729
- +------------+---------+--------------+
730
- | '``ecr ``' | 0 | {0, 0, 0, 0} |
731
- | +---------+--------------+
732
- | | 1 | {1, 1, 1, 0} |
733
- | +---------+--------------+
734
- | | 2 | {2, 2, 1, 0} |
735
- | +---------+--------------+
736
- | | 3 | {3, 2, 1, 0} |
737
- +------------+---------+--------------+
738
- | '``rc16 ``' | 0 | {1, 0, 1, 0} |
739
- | +---------+--------------+
740
- | | 1 | {3, 2, 3, 2} |
741
- | +---------+--------------+
742
- | | 2 | {1, 0, 1, 0} |
743
- | +---------+--------------+
744
- | | 3 | {3, 2, 3, 2} |
745
- +------------+---------+--------------+
690
+ second source operand (%hi ) is numbered {b7, b6, b5, b4}.
691
+
692
+ Depending on the 2 least significant bits of the %selector operand, the result
693
+ of the permutation is defined as follows:
694
+
695
+ +------------+---------------- +--------------+
696
+ | Mode | %selector [1:0] | Output |
697
+ +------------+---------------- +--------------+
698
+ | '``f4e ``' | 0 | {3, 2, 1, 0} |
699
+ | +---------------- +--------------+
700
+ | | 1 | {4, 3, 2, 1} |
701
+ | +---------------- +--------------+
702
+ | | 2 | {5, 4, 3, 2} |
703
+ | +---------------- +--------------+
704
+ | | 3 | {6, 5, 4, 3} |
705
+ +------------+---------------- +--------------+
706
+ | '``b4e ``' | 0 | {5, 6, 7, 0} |
707
+ | +---------------- +--------------+
708
+ | | 1 | {6, 7, 0, 1} |
709
+ | +---------------- +--------------+
710
+ | | 2 | {7, 0, 1, 2} |
711
+ | +---------------- +--------------+
712
+ | | 3 | {0, 1, 2, 3} |
713
+ +------------+---------------- +--------------+
714
+ | '``rc8 ``' | 0 | {0, 0, 0, 0} |
715
+ | +---------------- +--------------+
716
+ | | 1 | {1, 1, 1, 1} |
717
+ | +---------------- +--------------+
718
+ | | 2 | {2, 2, 2, 2} |
719
+ | +---------------- +--------------+
720
+ | | 3 | {3, 3, 3, 3} |
721
+ +------------+---------------- +--------------+
722
+ | '``ecl ``' | 0 | {3, 2, 1, 0} |
723
+ | +---------------- +--------------+
724
+ | | 1 | {3, 2, 1, 1} |
725
+ | +---------------- +--------------+
726
+ | | 2 | {3, 2, 2, 2} |
727
+ | +---------------- +--------------+
728
+ | | 3 | {3, 3, 3, 3} |
729
+ +------------+---------------- +--------------+
730
+ | '``ecr ``' | 0 | {0, 0, 0, 0} |
731
+ | +---------------- +--------------+
732
+ | | 1 | {1, 1, 1, 0} |
733
+ | +---------------- +--------------+
734
+ | | 2 | {2, 2, 1, 0} |
735
+ | +---------------- +--------------+
736
+ | | 3 | {3, 2, 1, 0} |
737
+ +------------+---------------- +--------------+
738
+ | '``rc16 ``' | 0 | {1, 0, 1, 0} |
739
+ | +---------------- +--------------+
740
+ | | 1 | {3, 2, 3, 2} |
741
+ | +---------------- +--------------+
742
+ | | 2 | {1, 0, 1, 0} |
743
+ | +---------------- +--------------+
744
+ | | 3 | {3, 2, 3, 2} |
745
+ +------------+---------------- +--------------+
746
746
747
747
TMA family of Intrinsics
748
748
------------------------
0 commit comments