[HLSL] Support vector swizzles on scalars (#67700)

llvm-beanz · web-flow · commit 2630d72cb343 · 2023-11-29T11:25:02.000-06:00
HLSL supports vector swizzles on scalars by implicitly converting the scalar to a single-element vector. This syntax is a convienent way to initialize vectors based on filling a scalar value. There are two parts of this change. The first part in the Lexer splits numeric constant tokens when a `.x` or `.r` suffix is encountered. This splitting is a bit hacky but allows the numeric constant to be parsed separately from the vector element expression. There is an ambiguity here with the `r` suffix used by fixed point types, however fixed point types aren't supported in HLSL so this should not cause any exposable problems (a separate issue has been filed to track validating language options for HLSL: #67689). The second part of this change is in Sema::LookupMemberExpr. For HLSL, if the base type is a scalar, we implicit cast the scalar to a one-element vector then call back to perform the vector lookup. Fixes #56658 and #67511
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
@@ -2158,6 +2158,14 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
   llvm::Value *Vec = Builder.CreateLoad(LV.getExtVectorAddress(),
                                         LV.isVolatileQualified());
 
+  // HLSL allows treating scalars as one-element vectors. Converting the scalar
+  // IR value to a vector here allows the rest of codegen to behave as normal.
+  if (getLangOpts().HLSL && !Vec->getType()->isVectorTy()) {
+    llvm::Type *DstTy = llvm::FixedVectorType::get(Vec->getType(), 1);
+    llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
+    Vec = Builder.CreateInsertElement(DstTy, Vec, Zero, "cast.splat");
+  }
+
   const llvm::Constant *Elts = LV.getExtVectorElts();
 
   // If the result of the expression is a non-vector type, we must be extracting
@@ -2427,10 +2435,20 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
 
 void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
                                                                LValue Dst) {
+  // HLSL allows storing to scalar values through ExtVector component LValues.
+  // To support this we need to handle the case where the destination address is
+  // a scalar.
+  Address DstAddr = Dst.getExtVectorAddress();
+  if (!DstAddr.getElementType()->isVectorTy()) {
+    assert(!Dst.getType()->isVectorType() &&
+           "this should only occur for non-vector l-values");
+    Builder.CreateStore(Src.getScalarVal(), DstAddr, Dst.isVolatileQualified());
+    return;
+  }
+
   // This access turns into a read/modify/write of the vector.  Load the input
   // value now.
-  llvm::Value *Vec = Builder.CreateLoad(Dst.getExtVectorAddress(),
-                                        Dst.isVolatileQualified());
+  llvm::Value *Vec = Builder.CreateLoad(DstAddr, Dst.isVolatileQualified());
   const llvm::Constant *Elts = Dst.getExtVectorElts();
 
   llvm::Value *SrcVal = Src.getScalarVal();
@@ -2478,7 +2496,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
       llvm_unreachable("unexpected shorten vector length");
     }
   } else {
-    // If the Src is a scalar (not a vector) it must be updating one element.
+    // If the Src is a scalar (not a vector), and the target is a vector it must
+    // be updating one element.
     unsigned InIdx = getAccessedFieldNo(0, Elts);
     llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
     Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt);
@@ -4879,7 +4898,6 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   case CK_IntegralToPointer:
   case CK_PointerToIntegral:
   case CK_PointerToBoolean:
-  case CK_VectorSplat:
   case CK_IntegralCast:
   case CK_BooleanToSignedIntegral:
   case CK_IntegralToBoolean:
@@ -5044,6 +5062,13 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   }
   case CK_ZeroToOCLOpaqueType:
     llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid");
+
+  case CK_VectorSplat: {
+    // LValue results of vector splats are only supported in HLSL.
+    if (!getLangOpts().HLSL)
+      return EmitUnsupportedLValue(E, "unexpected cast lvalue");
+    return EmitLValue(E->getSubExpr());
+  }
   }
 
   llvm_unreachable("Unhandled lvalue cast kind?");
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
@@ -1990,6 +1990,10 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
   while (isPreprocessingNumberBody(C)) {
     CurPtr = ConsumeChar(CurPtr, Size, Result);
     PrevCh = C;
+    if (LangOpts.HLSL && C == '.' && (*CurPtr == 'x' || *CurPtr == 'r')) {
+      CurPtr -= Size;
+      break;
+    }
     C = getCharAndSize(CurPtr, Size);
   }
 
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
@@ -930,7 +930,11 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   // and FP constants (specifically, the 'pp-number' regex), and assumes that
   // the byte at "*end" is both valid and not part of the regex.  Because of
   // this, it doesn't have to check for 'overscan' in various places.
-  if (isPreprocessingNumberBody(*ThisTokEnd)) {
+  // Note: For HLSL, the end token is allowed to be '.' which would be in the
+  // 'pp-number' regex. This is required to support vector swizzles on numeric
+  // constants (i.e. 1.xx or 1.5f.rrr).
+  if (isPreprocessingNumberBody(*ThisTokEnd) &&
+      !(LangOpts.HLSL && *ThisTokEnd == '.')) {
     Diags.Report(TokLoc, diag::err_lexing_numeric);
     hadError = true;
     return;
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
@@ -1714,6 +1714,16 @@ static ExprResult LookupMemberExpr(Sema &S, LookupResult &R,
                             ObjCImpDecl, HasTemplateArgs, TemplateKWLoc);
   }
 
+  // HLSL supports implicit conversion of scalar types to single element vector
+  // rvalues in member expressions.
+  if (S.getLangOpts().HLSL && BaseType->isScalarType()) {
+    QualType VectorTy = S.Context.getExtVectorType(BaseType, 1);
+    BaseExpr = S.ImpCastExprToType(BaseExpr.get(), VectorTy, CK_VectorSplat,
+                                   BaseExpr.get()->getValueKind());
+    return LookupMemberExpr(S, R, BaseExpr, IsArrow, OpLoc, SS, ObjCImpDecl,
+                            HasTemplateArgs, TemplateKWLoc);
+  }
+
   S.Diag(OpLoc, diag::err_typecheck_member_reference_struct_union)
     << BaseType << BaseExpr.get()->getSourceRange() << MemberLoc;
 
diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
@@ -0,0 +1,174 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK-LABEL: ToTwoInts
+// CHECK: [[splat:%.*]] = insertelement <1 x i32> poison, i32 {{.*}}, i64 0
+// CHECK: [[vec2:%.*]] = shufflevector <1 x i32> [[splat]], <1 x i32> poison, <2 x i32> zeroinitializer
+// CHECK: ret <2 x i32> [[vec2]]
+int2 ToTwoInts(int V){
+  return V.xx;
+}
+
+// CHECK-LABEL: ToFourFloats
+// [[splat:%.*]] = insertelement <1 x float> poison, float {{.*}}, i64 0
+// [[vec4:%.*]] = shufflevector <1 x float> [[splat]], <1 x float> poison, <4 x i32> zeroinitializer
+// ret <4 x float> [[vec4]]
+float4 ToFourFloats(float V){
+  return V.rrrr;
+}
+
+// CHECK-LABEL: FillOne
+// CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4
+// CHECK: store <1 x i32> <i32 1>, ptr [[vec1Ptr]], align 4
+// CHECK: [[vec1:%.*]] = load <1 x i32>, ptr [[vec1Ptr]], align 4
+// CHECK: [[vec2:%.*]] = shufflevector <1 x i32> [[vec1]], <1 x i32> poison, <2 x i32> zeroinitializer
+// CHECK: ret <2 x i32> [[vec2]]
+int2 FillOne(){
+  return 1.xx;
+}
+
+// CHECK-LABEL: FillOneUnsigned
+// CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4
+// CHECK: store <1 x i32> <i32 1>, ptr [[vec1Ptr]], align 4
+// CHECK: [[vec1:%.*]] = load <1 x i32>, ptr [[vec1Ptr]], align 4
+// CHECK: [[vec3:%.*]] = shufflevector <1 x i32> [[vec1]], <1 x i32> poison, <3 x i32> zeroinitializer
+// CHECK: ret <3 x i32> [[vec3]]
+uint3 FillOneUnsigned(){
+  return 1u.xxx;
+}
+
+// CHECK-LABEL: FillOneUnsignedLong
+// CHECK: [[vec1Ptr:%.*]] = alloca <1 x i64>, align 8
+// CHECK: store <1 x i64> <i64 1>, ptr [[vec1Ptr]], align 8
+// CHECK: [[vec1:%.*]] = load <1 x i64>, ptr [[vec1Ptr]], align 8
+// CHECK: [[vec4:%.*]] = shufflevector <1 x i64> [[vec1]], <1 x i64> poison, <4 x i32> zeroinitializer
+// CHECK: ret <4 x i64> [[vec4]]
+vector<uint64_t,4> FillOneUnsignedLong(){
+  return 1ul.xxxx;
+}
+
+// CHECK-LABEL: FillTwoPointFive
+// CHECK: [[vec1Ptr:%.*]] = alloca <1 x double>, align 8
+// CHECK: store <1 x double> <double 2.500000e+00>, ptr [[vec1Ptr]], align 8
+// CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[vec1Ptr]], align 8
+// CHECK: [[vec2:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <2 x i32> zeroinitializer
+// CHECK: ret <2 x double> [[vec2]]
+double2 FillTwoPointFive(){
+  return 2.5.rr;
+}
+
+// CHECK-LABEL: FillOneHalf
+// CHECK: [[vec1Ptr:%.*]] = alloca <1 x double>, align 8
+// CHECK: store <1 x double> <double 5.000000e-01>, ptr [[vec1Ptr]], align 8
+// CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[vec1Ptr]], align 8
+// CHECK: [[vec3:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <3 x i32> zeroinitializer
+// CHECK: ret <3 x double> [[vec3]]
+double3 FillOneHalf(){
+  return .5.rrr;
+}
+
+// CHECK-LABEL: FillTwoPointFiveFloat
+// CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4
+// CHECK: store <1 x float> <float 2.500000e+00>, ptr [[vec1Ptr]], align 4
+// CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4
+// CHECK: [[vec4:%.*]] = shufflevector <1 x float> [[vec1]], <1 x float> poison, <4 x i32> zeroinitializer
+// CHECK: ret <4 x float> [[vec4]]
+float4 FillTwoPointFiveFloat(){
+  return 2.5f.rrrr;
+}
+
+// The initial codegen for this case is correct but a bit odd. The IR optimizer
+// cleans this up very nicely.
+
+// CHECK-LABEL: FillOneHalfFloat
+// CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4
+// CHECK: store <1 x float> <float 5.000000e-01>, ptr [[vec1Ptr]], align 4
+// CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4
+// CHECK: [[vec1Ret:%.*]] = shufflevector <1 x float> [[vec1]], <1 x float> undef, <1 x i32> zeroinitializer
+// CHECK: ret <1 x float> [[vec1Ret]]
+vector<float, 1> FillOneHalfFloat(){
+  return .5f.r;
+}
+
+// The initial codegen for this case is correct but a bit odd. The IR optimizer
+// cleans this up very nicely.
+
+// CHECK-LABEL: HowManyFloats
+// CHECK: [[VAddr:%.*]] = alloca float, align 4
+// CHECK: [[vec2Ptr:%.*]] = alloca <2 x float>, align 8
+// CHECK: [[VVal:%.*]] = load float, ptr [[VAddr]], align 4
+// CHECK: [[splat:%.*]] = insertelement <1 x float> poison, float [[VVal]], i64 0
+// CHECK: [[vec2:%.*]] = shufflevector <1 x float> [[splat]], <1 x float> poison, <2 x i32> zeroinitializer
+// CHECK: store <2 x float> [[vec2]], ptr [[vec2Ptr]], align 8
+// CHECK: [[vec2:%.*]] = load <2 x float>, ptr [[vec2Ptr]], align 8
+// CHECK: [[vec2Res:%.*]] = shufflevector <2 x float> [[vec2]], <2 x float> poison, <2 x i32> zeroinitializer
+// CHECK: ret <2 x float> [[vec2Res]]
+float2 HowManyFloats(float V) {
+  return V.rr.rr;
+}
+
+// This codegen is gnarly because `1.` is a double, so this creates double
+// vectors that need to be truncated down to floats. The optimizer cleans this
+// up nicely too.
+
+// CHECK-LABEL: AllRighty
+// CHECK: [[XTmp:%.*]] = alloca <1 x double>, align 8
+// CHECK: [[YTmp:%.*]] = alloca <1 x double>, align 8
+// CHECK: [[ZTmp:%.*]] = alloca <1 x double>, align 8
+
+// CHECK: store <1 x double> <double 1.000000e+00>, ptr [[XTmp]], align 8
+// CHECK: [[XVec:%.*]] = load <1 x double>, ptr [[XTmp]], align 8
+// CHECK: [[XVec3:%.*]] = shufflevector <1 x double> [[XVec]], <1 x double> poison, <3 x i32> zeroinitializer
+// CHECK: [[XVal:%.*]] = extractelement <3 x double> [[XVec3]], i32 0
+// CHECK: [[XValF:%.*]] = fptrunc double [[XVal]] to float
+// CHECK: [[Vec3F1:%.*]] = insertelement <3 x float> undef, float [[XValF]], i32 0
+
+// CHECK: store <1 x double> <double 1.000000e+00>, ptr [[YTmp]], align 8
+// CHECK: [[YVec:%.*]] = load <1 x double>, ptr [[YTmp]], align 8
+// CHECK: [[YVec3:%.*]] = shufflevector <1 x double> [[YVec]], <1 x double> poison, <3 x i32> zeroinitializer
+// CHECK: [[YVal:%.*]] = extractelement <3 x double> [[YVec3]], i32 1
+// CHECK: [[YValF:%.*]] = fptrunc double [[YVal]] to float
+// CHECK: [[Vec3F2:%.*]] = insertelement <3 x float> [[Vec3F1]], float [[YValF]], i32 1
+
+// CHECK: store <1 x double> <double 1.000000e+00>, ptr [[ZTmp]], align 8
+// CHECK: [[ZVec:%.*]] = load <1 x double>, ptr [[ZTmp]], align 8
+// CHECK: [[ZVec3:%.*]] = shufflevector <1 x double> [[ZVec]], <1 x double> poison, <3 x i32> zeroinitializer
+// CHECK: [[ZVal:%.*]] = extractelement <3 x double> [[ZVec3]], i32 2
+// CHECK: [[ZValF:%.*]] = fptrunc double [[ZVal]] to float
+// CHECK: [[Vec3F3:%.*]] = insertelement <3 x float> [[Vec3F2]], float [[ZValF]], i32 2
+
+// ret <3 x float> [[Vec3F3]]
+float3 AllRighty() {
+  return 1..rrr;
+}
+
+// CHECK-LABEL: AssignInt
+// CHECK: [[VAddr:%.*]] = alloca i32, align 4
+// CHECK: [[XAddr:%.*]] = alloca i32, align 4
+
+// Load V into a vector, then extract V out and store it to X.
+// CHECK: [[V:%.*]] = load i32, ptr [[VAddr]], align 4
+// CHECK: [[Splat:%.*]] = insertelement <1 x i32> poison, i32 [[V]], i64 0
+// CHECK: [[VExtVal:%.*]] = extractelement <1 x i32> [[Splat]], i32 0
+// CHECK: store i32 [[VExtVal]], ptr [[XAddr]], align 4
+
+// Load V into two separate vectors, then add the extracted X components.
+// CHECK: [[V:%.*]] = load i32, ptr [[VAddr]], align 4
+// CHECK: [[Splat:%.*]] = insertelement <1 x i32> poison, i32 [[V]], i64 0
+// CHECK: [[LHS:%.*]] = extractelement <1 x i32> [[Splat]], i32 0
+
+// CHECK: [[V:%.*]] = load i32, ptr [[VAddr]], align 4
+// CHECK: [[Splat:%.*]] = insertelement <1 x i32> poison, i32 [[V]], i64 0
+// CHECK: [[RHS:%.*]] = extractelement <1 x i32> [[Splat]], i32 0
+
+// CHECK: [[Sum:%.*]] = add nsw i32 [[LHS]], [[RHS]]
+// CHECK: store i32 [[Sum]], ptr [[XAddr]], align 4
+// CHECK: [[X:%.*]] = load i32, ptr [[XAddr]], align 4
+// CHECK: ret i32 [[X]]
+
+int AssignInt(int V){
+  int X = V.x;
+  X.x = V.x + V.x;
+  return X;
+}
diff --git a/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl b/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library  -x hlsl -finclude-default-header -verify %s
+
+int2 ToTwoInts(int V) {
+  return V.xy; // expected-error{{vector component access exceeds type 'int __attribute__((ext_vector_type(1)))' (vector of 1 'int' value)}}
+}
+
+float2 ToTwoFloats(float V) {
+  return V.rg; // expected-error{{vector component access exceeds type 'float __attribute__((ext_vector_type(1)))' (vector of 1 'float' value)}}
+}
+
+int4 SomeNonsense(int V) {
+  return V.poop; // expected-error{{illegal vector component name 'p'}}
+}
+
+float2 WhatIsHappening(float V) {
+  return V.; // expected-error{{expected unqualified-id}}
+}
+
+// These cases produce no error.
+
+float2 HowManyFloats(float V) {
+  return V.rr.rr;
+}
+
+int64_t4 HooBoy() {
+  return 4l.xxxx;
+}
+
+float3 AllRighty() {
+  return 1..rrr;
+}
diff --git a/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzles.hlsl b/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzles.hlsl