[SystemZ] Implement vector rotate in terms of funnel shift

uweigand · uweigand · commit c61eb440059d · 2023-12-04T16:52:00.000+01:00
Clang currently implements a set of vector rotate builtins
(__builtin_s390_verll*) in terms of platform-specific LLVM
intrinsics.  To simplify the IR (and allow for common code
optimizations if applicable), this patch removes those LLVM
intrinsics and implements the builtins in terms of the
platform-independent funnel shift intrinsics instead.

Also, fix the prototype of the __builtin_s390_verll*
builtins for full compatibility with GCC.
diff --git a/clang/include/clang/Basic/BuiltinsSystemZ.def b/clang/include/clang/Basic/BuiltinsSystemZ.def
@@ -105,10 +105,10 @@ TARGET_BUILTIN(__builtin_s390_verimb, "V16UcV16UcV16UcV16UcIi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_verimh, "V8UsV8UsV8UsV8UsIi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_verimf, "V4UiV4UiV4UiV4UiIi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_verimg, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_verllb, "V16UcV16UcUi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_verllh, "V8UsV8UsUi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_verllf, "V4UiV4UiUi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_verllg, "V2ULLiV2ULLiUi", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_verllb, "V16UcV16UcUc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_verllh, "V8UsV8UsUc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_verllf, "V4UiV4UiUc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_verllg, "V2ULLiV2ULLiUc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_verllvb, "V16UcV16UcV16Uc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_verllvh, "V8UsV8UsV8Us", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_verllvf, "V4UiV4UiV4Ui", "nc", "vector")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18337,6 +18337,32 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, {X, Undef});
   }
 
+  case SystemZ::BI__builtin_s390_verllb:
+  case SystemZ::BI__builtin_s390_verllh:
+  case SystemZ::BI__builtin_s390_verllf:
+  case SystemZ::BI__builtin_s390_verllg: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
+    // Splat scalar rotate amount to vector type.
+    unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
+    Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
+    Amt = Builder.CreateVectorSplat(NumElts, Amt);
+    Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
+    return Builder.CreateCall(F, { Src, Src, Amt });
+  }
+
+  case SystemZ::BI__builtin_s390_verllvb:
+  case SystemZ::BI__builtin_s390_verllvh:
+  case SystemZ::BI__builtin_s390_verllvf:
+  case SystemZ::BI__builtin_s390_verllvg: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
+    Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
+    return Builder.CreateCall(F, { Src, Src, Amt });
+  }
+
   case SystemZ::BI__builtin_s390_vfsqsb:
   case SystemZ::BI__builtin_s390_vfsqdb: {
     llvm::Type *ResultType = ConvertType(E->getType());
diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h
@@ -6565,45 +6565,45 @@ vec_rl(__vector unsigned long long __a, __vector unsigned long long __b) {
 static inline __ATTRS_o_ai __vector signed char
 vec_rli(__vector signed char __a, unsigned long __b) {
   return (__vector signed char)__builtin_s390_verllb(
-    (__vector unsigned char)__a, (int)__b);
+    (__vector unsigned char)__a, (unsigned char)__b);
 }
 
 static inline __ATTRS_o_ai __vector unsigned char
 vec_rli(__vector unsigned char __a, unsigned long __b) {
-  return __builtin_s390_verllb(__a, (int)__b);
+  return __builtin_s390_verllb(__a, (unsigned char)__b);
 }
 
 static inline __ATTRS_o_ai __vector signed short
 vec_rli(__vector signed short __a, unsigned long __b) {
   return (__vector signed short)__builtin_s390_verllh(
-    (__vector unsigned short)__a, (int)__b);
+    (__vector unsigned short)__a, (unsigned char)__b);
 }
 
 static inline __ATTRS_o_ai __vector unsigned short
 vec_rli(__vector unsigned short __a, unsigned long __b) {
-  return __builtin_s390_verllh(__a, (int)__b);
+  return __builtin_s390_verllh(__a, (unsigned char)__b);
 }
 
 static inline __ATTRS_o_ai __vector signed int
 vec_rli(__vector signed int __a, unsigned long __b) {
   return (__vector signed int)__builtin_s390_verllf(
-    (__vector unsigned int)__a, (int)__b);
+    (__vector unsigned int)__a, (unsigned char)__b);
 }
 
 static inline __ATTRS_o_ai __vector unsigned int
 vec_rli(__vector unsigned int __a, unsigned long __b) {
-  return __builtin_s390_verllf(__a, (int)__b);
+  return __builtin_s390_verllf(__a, (unsigned char)__b);
 }
 
 static inline __ATTRS_o_ai __vector signed long long
 vec_rli(__vector signed long long __a, unsigned long __b) {
   return (__vector signed long long)__builtin_s390_verllg(
-    (__vector unsigned long long)__a, (int)__b);
+    (__vector unsigned long long)__a, (unsigned char)__b);
 }
 
 static inline __ATTRS_o_ai __vector unsigned long long
 vec_rli(__vector unsigned long long __a, unsigned long __b) {
-  return __builtin_s390_verllg(__a, (int)__b);
+  return __builtin_s390_verllg(__a, (unsigned char)__b);
 }
 
 /*-- vec_rl_mask ------------------------------------------------------------*/
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
@@ -23,6 +23,7 @@ volatile vec_ulong vul;
 volatile vec_double vd;
 
 volatile unsigned int len;
+volatile unsigned char amt;
 const void * volatile cptr;
 void * volatile ptr;
 int cc;
@@ -184,23 +185,23 @@ void test_integer(void) {
   vul = __builtin_s390_verimg(vul, vul, vul, 255);
   // CHECK: call <2 x i64> @llvm.s390.verimg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 255)
 
-  vuc = __builtin_s390_verllb(vuc, len);
-  // CHECK: call <16 x i8> @llvm.s390.verllb(<16 x i8> %{{.*}}, i32 %{{.*}})
-  vus = __builtin_s390_verllh(vus, len);
-  // CHECK: call <8 x i16> @llvm.s390.verllh(<8 x i16> %{{.*}}, i32 %{{.*}})
-  vui = __builtin_s390_verllf(vui, len);
-  // CHECK: call <4 x i32> @llvm.s390.verllf(<4 x i32> %{{.*}}, i32 %{{.*}})
-  vul = __builtin_s390_verllg(vul, len);
-  // CHECK: call <2 x i64> @llvm.s390.verllg(<2 x i64> %{{.*}}, i32 %{{.*}})
+  vuc = __builtin_s390_verllb(vuc, amt);
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = __builtin_s390_verllh(vus, amt);
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vui = __builtin_s390_verllf(vui, amt);
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vul = __builtin_s390_verllg(vul, amt);
+  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 
   vuc = __builtin_s390_verllvb(vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.verllvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   vus = __builtin_s390_verllvh(vus, vus);
-  // CHECK: call <8 x i16> @llvm.s390.verllvh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   vui = __builtin_s390_verllvf(vui, vui);
-  // CHECK: call <4 x i32> @llvm.s390.verllvf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   vul = __builtin_s390_verllvg(vul, vul);
-  // CHECK: call <2 x i64> @llvm.s390.verllvg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 
   vus = __builtin_s390_vgfmb(vuc, vuc);
   // CHECK: call <8 x i16> @llvm.s390.vgfmb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
@@ -2564,53 +2564,53 @@ void test_integer(void) {
   // (emulated)
 
   vsc = vec_rl(vsc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.verllvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: verllvb
   vuc = vec_rl(vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.verllvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: verllvb
   vss = vec_rl(vss, vus);
-  // CHECK: call <8 x i16> @llvm.s390.verllvh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   // CHECK-ASM: verllvh
   vus = vec_rl(vus, vus);
-  // CHECK: call <8 x i16> @llvm.s390.verllvh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   // CHECK-ASM: verllvh
   vsi = vec_rl(vsi, vui);
-  // CHECK: call <4 x i32> @llvm.s390.verllvf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   // CHECK-ASM: verllvf
   vui = vec_rl(vui, vui);
-  // CHECK: call <4 x i32> @llvm.s390.verllvf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   // CHECK-ASM: verllvf
   vsl = vec_rl(vsl, vul);
-  // CHECK: call <2 x i64> @llvm.s390.verllvg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK-ASM: verllvg
   vul = vec_rl(vul, vul);
-  // CHECK: call <2 x i64> @llvm.s390.verllvg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK-ASM: verllvg
 
   vsc = vec_rli(vsc, ul);
-  // CHECK: call <16 x i8> @llvm.s390.verllb(<16 x i8> %{{.*}}, i32 %{{.*}})
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: verllb
   vuc = vec_rli(vuc, ul);
-  // CHECK: call <16 x i8> @llvm.s390.verllb(<16 x i8> %{{.*}}, i32 %{{.*}})
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: verllb
   vss = vec_rli(vss, ul);
-  // CHECK: call <8 x i16> @llvm.s390.verllh(<8 x i16> %{{.*}}, i32 %{{.*}})
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   // CHECK-ASM: verllh
   vus = vec_rli(vus, ul);
-  // CHECK: call <8 x i16> @llvm.s390.verllh(<8 x i16> %{{.*}}, i32 %{{.*}})
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   // CHECK-ASM: verllh
   vsi = vec_rli(vsi, ul);
-  // CHECK: call <4 x i32> @llvm.s390.verllf(<4 x i32> %{{.*}}, i32 %{{.*}})
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   // CHECK-ASM: verllf
   vui = vec_rli(vui, ul);
-  // CHECK: call <4 x i32> @llvm.s390.verllf(<4 x i32> %{{.*}}, i32 %{{.*}})
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   // CHECK-ASM: verllf
   vsl = vec_rli(vsl, ul);
-  // CHECK: call <2 x i64> @llvm.s390.verllg(<2 x i64> %{{.*}}, i32 %{{.*}})
+  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK-ASM: verllg
   vul = vec_rli(vul, ul);
-  // CHECK: call <2 x i64> @llvm.s390.verllg(<2 x i64> %{{.*}}, i32 %{{.*}})
+  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK-ASM: verllg
 
   vsc = vec_rl_mask(vsc, vuc, 0);
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -30,10 +30,6 @@ class SystemZBinaryConv<string name, LLVMType result, LLVMType arg>
 class SystemZBinary<string name, LLVMType type>
   : SystemZBinaryConv<name, type, type>;
 
-class SystemZBinaryInt<string name, LLVMType type>
-  : ClangBuiltin<"__builtin_s390_" # name>,
-    Intrinsic<[type], [type, llvm_i32_ty], [IntrNoMem]>;
-
 class SystemZBinaryConvCC<LLVMType result, LLVMType arg>
   : Intrinsic<[result, llvm_i32_ty], [arg, arg], [IntrNoMem]>;
 
@@ -131,13 +127,6 @@ multiclass SystemZBinaryBHFG<string name> : SystemZBinaryBHF<name> {
   def g : SystemZBinary<name#"g", llvm_v2i64_ty>;
 }
 
-multiclass SystemZBinaryIntBHFG<string name> {
-  def b : SystemZBinaryInt<name#"b", llvm_v16i8_ty>;
-  def h : SystemZBinaryInt<name#"h", llvm_v8i16_ty>;
-  def f : SystemZBinaryInt<name#"f", llvm_v4i32_ty>;
-  def g : SystemZBinaryInt<name#"g", llvm_v2i64_ty>;
-}
-
 multiclass SystemZBinaryCCBHF {
   def bs : SystemZBinaryCC<llvm_v16i8_ty>;
   def hs : SystemZBinaryCC<llvm_v8i16_ty>;
@@ -303,8 +292,6 @@ let TargetPrefix = "s390" in {
   defm int_s390_vmo  : SystemZBinaryExtBHF<"vmo">;
   defm int_s390_vmlo : SystemZBinaryExtBHF<"vmlo">;
 
-  defm int_s390_verllv : SystemZBinaryBHFG<"verllv">;
-  defm int_s390_verll  : SystemZBinaryIntBHFG<"verll">;
   defm int_s390_verim  : SystemZQuaternaryIntBHFG<"verim">;
 
   def int_s390_vsl   : SystemZBinary<"vsl",   llvm_v16i8_ty>;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -385,16 +385,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
 
-      // Detect shifts by a scalar amount and convert them into
+      // Detect shifts/rotates by a scalar amount and convert them into
       // V*_BY_SCALAR.
       setOperationAction(ISD::SHL, VT, Custom);
       setOperationAction(ISD::SRA, VT, Custom);
       setOperationAction(ISD::SRL, VT, Custom);
-
-      // At present ROTL isn't matched by DAGCombiner.  ROTR should be
-      // converted into ROTL.
-      setOperationAction(ISD::ROTL, VT, Expand);
-      setOperationAction(ISD::ROTR, VT, Expand);
+      setOperationAction(ISD::ROTL, VT, Custom);
 
       // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
       // and inverting the result as necessary.
@@ -5979,6 +5975,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
   case ISD::SRA:
     return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
+  case ISD::ROTL:
+    return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
   case ISD::IS_FPCLASS:
     return lowerIS_FPCLASS(Op, DAG);
   case ISD::GET_ROUNDING:
@@ -6143,6 +6141,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(VSHL_BY_SCALAR);
     OPCODE(VSRL_BY_SCALAR);
     OPCODE(VSRA_BY_SCALAR);
+    OPCODE(VROTL_BY_SCALAR);
     OPCODE(VSUM);
     OPCODE(VICMPE);
     OPCODE(VICMPH);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -215,11 +215,12 @@ enum NodeType : unsigned {
   UNPACK_LOW,
   UNPACKL_LOW,
 
-  // Shift each element of vector operand 0 by the number of bits specified
-  // by scalar operand 1.
+  // Shift/rotate each element of vector operand 0 by the number of bits
+  // specified by scalar operand 1.
   VSHL_BY_SCALAR,
   VSRL_BY_SCALAR,
   VSRA_BY_SCALAR,
+  VROTL_BY_SCALAR,
 
   // For each element of the output type, sum across all sub-elements of
   // operand 0 belonging to the corresponding element, and add in the
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -732,21 +732,17 @@ let Predicates = [FeatureVector] in {
 
   // Element rotate left logical (with vector shift amount).
   def VERLLV  : BinaryVRRcGeneric<"verllv", 0xE773>;
-  def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb,
-                           v128b, v128b, 0>;
-  def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh,
-                           v128h, v128h, 1>;
-  def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf,
-                           v128f, v128f, 2>;
-  def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg,
-                           v128g, v128g, 3>;
+  def VERLLVB : BinaryVRRc<"verllvb", 0xE773, rotl, v128b, v128b, 0>;
+  def VERLLVH : BinaryVRRc<"verllvh", 0xE773, rotl, v128h, v128h, 1>;
+  def VERLLVF : BinaryVRRc<"verllvf", 0xE773, rotl, v128f, v128f, 2>;
+  def VERLLVG : BinaryVRRc<"verllvg", 0xE773, rotl, v128g, v128g, 3>;
 
   // Element rotate left logical (with scalar shift amount).
   def VERLL  : BinaryVRSaGeneric<"verll", 0xE733>;
-  def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>;
-  def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>;
-  def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>;
-  def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>;
+  def VERLLB : BinaryVRSa<"verllb", 0xE733, z_vrotl_by_scalar, v128b, v128b, 0>;
+  def VERLLH : BinaryVRSa<"verllh", 0xE733, z_vrotl_by_scalar, v128h, v128h, 1>;
+  def VERLLF : BinaryVRSa<"verllf", 0xE733, z_vrotl_by_scalar, v128f, v128f, 2>;
+  def VERLLG : BinaryVRSa<"verllg", 0xE733, z_vrotl_by_scalar, v128g, v128g, 3>;
 
   // Element rotate and insert under mask.
   def VERIM  : QuaternaryVRIdGeneric<"verim", 0xE772>;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -324,6 +324,8 @@ def z_vsrl_by_scalar    : SDNode<"SystemZISD::VSRL_BY_SCALAR",
                                  SDT_ZVecBinaryInt>;
 def z_vsra_by_scalar    : SDNode<"SystemZISD::VSRA_BY_SCALAR",
                                  SDT_ZVecBinaryInt>;
+def z_vrotl_by_scalar   : SDNode<"SystemZISD::VROTL_BY_SCALAR",
+                                 SDT_ZVecBinaryInt>;
 def z_vsum              : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
 def z_vicmpe            : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
 def z_vicmph            : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
diff --git a/llvm/test/CodeGen/SystemZ/vec-intrinsics-01.ll b/llvm/test/CodeGen/SystemZ/vec-intrinsics-01.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-rot-01.ll b/llvm/test/CodeGen/SystemZ/vec-rot-01.ll
diff --git a/llvm/test/CodeGen/SystemZ/vec-rot-02.ll b/llvm/test/CodeGen/SystemZ/vec-rot-02.ll