[dfsan] Fix origin tracking for fast8

gbalats · gbalats · commit a45fd436aef4 · 2021-04-30T15:57:33.000-07:00
The problem is the following. With fast8, we broke an important invariant when loading shadows. A wide shadow of 64 bits used to correspond to 4 application bytes with fast16; so, generating a single load was okay since those 4 application bytes would share a single origin. Now, using fast8, a wide shadow of 64 bits corresponds to 8 application bytes that should be backed by 2 origins (but we kept generating just one). Let’s say our wide shadow is 64-bit and consists of the following: 0xABCDEFGH. To check if we need the second origin value, we could do the following (on the 64-bit wide shadow) case: - bitwise shift the wide shadow left by 32 bits (yielding 0xEFGH0000) - push the result along with the first origin load to the shadow/origin vectors - load the second 32-bit origin of the 64-bit wide shadow - push the wide shadow along with the second origin to the shadow/origin vectors. The combineOrigins would then select the second origin if the wide shadow is of the form 0xABCDE0000. The tests illustrate how this change affects the generated bitcode. Reviewed By: stephan.yichao.zhao Differential Revision: https://reviews.llvm.org/D101584
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -505,6 +505,11 @@ class DataFlowSanitizer {
 
   bool init(Module &M);
 
+  /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
+  /// from it. Returns the origin's loaded value.
+  Value *loadNextOrigin(Instruction *Pos, Align OriginAlign,
+                        Value **OriginAddr);
+
   /// Returns whether fast8 or fast16 mode has been specified.
   bool hasFastLabelsEnabled();
 
@@ -2094,6 +2099,14 @@ bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
   return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
 }
 
+Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign,
+                                         Value **OriginAddr) {
+  IRBuilder<> IRB(Pos);
+  *OriginAddr =
+      IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
+  return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
+}
+
 std::pair<Value *, Value *> DFSanFunction::loadFast16ShadowFast(
     Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
     Align OriginAlign, Value *FirstOrigin, Instruction *Pos) {
@@ -2125,19 +2138,39 @@ std::pair<Value *, Value *> DFSanFunction::loadFast16ShadowFast(
   Value *CombinedWideShadow =
       IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
 
-  if (ShouldTrackOrigins) {
-    Shadows.push_back(CombinedWideShadow);
-    Origins.push_back(FirstOrigin);
-  }
+  unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
+  const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
+
+  auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
+    if (BytesPerWideShadow > 4) {
+      assert(BytesPerWideShadow == 8);
+      // The wide shadow relates to two origin pointers: one for the first four
+      // application bytes, and one for the latest four. We use a left shift to
+      // get just the shadow bytes that correspond to the first origin pointer,
+      // and then the entire shadow for the second origin pointer (which will be
+      // chosen by combineOrigins() iff the least-significant half of the wide
+      // shadow was empty but the other half was not).
+      Value *WideShadowLo = IRB.CreateShl(
+          WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));
+      Shadows.push_back(WideShadow);
+      Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
+
+      Shadows.push_back(WideShadowLo);
+      Origins.push_back(Origin);
+    } else {
+      Shadows.push_back(WideShadow);
+      Origins.push_back(Origin);
+    }
+  };
+
+  if (ShouldTrackOrigins)
+    AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
 
   // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
   // then OR individual shadows within the combined WideShadow by binary ORing.
   // This is fewer instructions than ORing shadows individually, since it
   // needs logN shift/or instructions (N being the bytes of the combined wide
   // shadow).
-  unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
-  const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
-
   for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
        ByteOfs += BytesPerWideShadow) {
     WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr,
@@ -2146,11 +2179,8 @@ std::pair<Value *, Value *> DFSanFunction::loadFast16ShadowFast(
         IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
     CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
     if (ShouldTrackOrigins) {
-      Shadows.push_back(NextWideShadow);
-      OriginAddr = IRB.CreateGEP(DFS.OriginTy, OriginAddr,
-                                 ConstantInt::get(DFS.IntptrTy, 1));
-      Origins.push_back(
-          IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign));
+      Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
+      AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
     }
   }
   for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
@@ -191,13 +191,18 @@ define i64 @load64(i64* %p) {
   ; CHECK16-NEXT:          %[[#ORIGIN:]] = select i1 %[[#SHADOW_NZ]], i32 %[[#ORIGIN2]], i32 %[[#ORIGIN]]
 
   ; COMM: On fast8, no need to OR the wide shadow but one more shift is needed.
+  ; CHECK8-NEXT:           %[[#WIDE_SHADOW_LO:]] = shl i64 %[[#WIDE_SHADOW]], 32
+  ; CHECK8-NEXT:           %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1
+  ; CHECK8-NEXT:           %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 32
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]]
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 16
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]]
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 8
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]]
   ; CHECK8-NEXT:           %[[#SHADOW:]] = trunc i64 %[[#WIDE_SHADOW]] to i[[#SBITS]]
+  ; CHECK8-NEXT:           %[[#SHADOW_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW_LO]], 0
+  ; CHECK8-NEXT:           %[[#ORIGIN:]] = select i1 %[[#SHADOW_NZ]], i32 %[[#ORIGIN]], i32 %[[#ORIGIN2]]
 
   ; COMBINE_LOAD_PTR-NEXT: %[[#SHADOW:]] = or i[[#SBITS]] %[[#SHADOW]], %[[#PS]]
   ; COMBINE_LOAD_PTR-NEXT: %[[#NZ:]] = icmp ne i[[#SBITS]] %[[#PS]], 0
@@ -250,13 +255,16 @@ define i128 @load128(i128* %p) {
   ; CHECK-NEXT:            %[[#ORIGIN:]] = load i32, i32* %[[#ORIGIN_PTR]], align 8
   ; CHECK-NEXT:            %[[#WIDE_SHADOW_PTR:]] = bitcast i[[#SBITS]]* %[[#SHADOW_PTR]] to i64*
   ; CHECK-NEXT:            %[[#WIDE_SHADOW:]] = load i64, i64* %[[#WIDE_SHADOW_PTR]], align [[#SBYTES]]
+  ; CHECK8-NEXT:           %[[#WIDE_SHADOW_LO:]] = shl i64 %[[#WIDE_SHADOW]], 32
+  ; CHECK8-NEXT:           %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1
+  ; CHECK8-NEXT:           %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8
   ; CHECK-NEXT:            %[[#WIDE_SHADOW_PTR2:]] = getelementptr i64, i64* %[[#WIDE_SHADOW_PTR]], i64 1
   ; CHECK-NEXT:            %[[#WIDE_SHADOW2:]] = load i64, i64* %[[#WIDE_SHADOW_PTR2]], align [[#SBYTES]]
   ; CHECK-NEXT:            %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW2]]
-  ; CHECK-NEXT:            %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1
-  ; CHECK-NEXT:            %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8
 
   ; COMM: On fast16, we need to OR 4x64bits for the wide shadow, before ORing its bytes.
+  ; CHECK16-NEXT:          %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1
+  ; CHECK16-NEXT:          %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8
   ; CHECK16-NEXT:          %[[#WIDE_SHADOW_PTR3:]] = getelementptr i64, i64* %[[#WIDE_SHADOW_PTR2]], i64 1
   ; CHECK16-NEXT:          %[[#WIDE_SHADOW3:]] = load i64, i64* %[[#WIDE_SHADOW_PTR3]], align [[#SBYTES]]
   ; CHECK16-NEXT:          %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW3]]
@@ -280,15 +288,24 @@ define i128 @load128(i128* %p) {
   ; CHECK16-NEXT:          %[[#ORIGIN:]] = select i1 %[[#SHADOW4_NZ]], i32 %[[#ORIGIN4]], i32 %[[#ORIGIN]]
   
   ; COMM: On fast8, we need to OR 2x64bits for the wide shadow, before ORing its bytes (one more shift).
+  ; CHECK8-NEXT:           %[[#ORIGIN_PTR3:]] = getelementptr i32, i32* %[[#ORIGIN_PTR2]], i64 1
+  ; CHECK8-NEXT:           %[[#ORIGIN3:]] = load i32, i32* %[[#ORIGIN_PTR3]], align 8
+  ; CHECK8-NEXT:           %[[#WIDE_SHADOW2_LO:]] = shl i64 %[[#WIDE_SHADOW2]], 32
+  ; CHECK8-NEXT:           %[[#ORIGIN_PTR4:]] = getelementptr i32, i32* %[[#ORIGIN_PTR3]], i64 1
+  ; CHECK8-NEXT:           %[[#ORIGIN4:]] = load i32, i32* %[[#ORIGIN_PTR4]], align 8
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 32
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]]
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 16
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]]
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 8
   ; CHECK8-NEXT:           %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]]
   ; CHECK8-NEXT:           %[[#SHADOW:]] = trunc i64 %[[#WIDE_SHADOW]] to i[[#SBITS]]
+  ; CHECK8-NEXT:           %[[#SHADOW_LO_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW_LO]], 0
+  ; CHECK8-NEXT:           %[[#ORIGIN:]] = select i1 %[[#SHADOW_LO_NZ]], i32 %[[#ORIGIN]], i32 %[[#ORIGIN2]]
   ; CHECK8-NEXT:           %[[#SHADOW2_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW2]], 0
-  ; CHECK8-NEXT:           %[[#ORIGIN:]] = select i1 %[[#SHADOW2_NZ]], i32 %[[#ORIGIN2]], i32 %[[#ORIGIN]]
+  ; CHECK8-NEXT:           %[[#ORIGIN:]] = select i1 %[[#SHADOW2_NZ]], i32 %[[#ORIGIN4]], i32 %[[#ORIGIN]]
+  ; CHECK8-NEXT:           %[[#SHADOW2_LO_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW2_LO]], 0
+  ; CHECK8-NEXT:           %[[#ORIGIN:]] = select i1 %[[#SHADOW2_LO_NZ]], i32 %[[#ORIGIN3]], i32 %[[#ORIGIN]]
 
   ; COMBINE_LOAD_PTR-NEXT: %[[#SHADOW:]] = or i[[#SBITS]] %[[#SHADOW]], %[[#PS]]
   ; COMBINE_LOAD_PTR-NEXT: %[[#NZ:]] = icmp ne i[[#SBITS]] %[[#PS]], 0