一个GVN无法消除的例子

发布时间：2024年01月08日

适配新例子

#include <stdio.h>
int input() {
    int x;
    scanf("%d", &x);
    return x;
}
int main() {
    int a = input(), b = input(), m = input(), n = input();
    int y, c;
    if(b) {
        c = m;
        y = a + n;
    } else {
        c = n;
        y = a + m;
    }
    return y + c;
}

上述例子中，y+c在两个分支中是一致的，也即两个分支的表达式之间是冗余的计算，但LLVM的优化并没有将二者进行消除，而是使用了select结构。

为了突出上述例子的严重性，我又造了一个例子，以显示此类问题。

#include <stdio.h>
int input() {
    int x;
    scanf("%d", &x);
    return x;
}
int main() {
    int a = input(), b = input(), m = input(), n = input();
    int y, c, z;
    for(int i = 0; i < 1000; i++) {
    if(b) {
        c = m;
        y = a + n;
        b = input();
    } else {
        c = n;
        y = a + m;
        b = input();
    }
    z = y + c;
    }
    return z;
}

优化之后的LLVM IR如下所示(只显示主函数）：

define dso_local noundef i32 @main() local_unnamed_addr {
entry:
  %x.i20 = alloca i32, align 4
  %x.i18 = alloca i32, align 4
  %x.i16 = alloca i32, align 4
  %x.i14 = alloca i32, align 4
  %x.i12 = alloca i32, align 4
  %x.i = alloca i32, align 4
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x.i) #6
  %call.i = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef nonnull @.str, ptr noundef nonnull %x.i)
  %0 = load i32, ptr %x.i, align 4
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x.i) #6
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x.i12) #6
  %call.i13 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef nonnull @.str, ptr noundef nonnull %x.i12)
  %1 = load i32, ptr %x.i12, align 4
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x.i12) #6
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x.i14) #6
  %call.i15 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef nonnull @.str, ptr noundef nonnull %x.i14)
  %2 = load i32, ptr %x.i14, align 4
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x.i14) #6
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x.i16) #6
  %call.i17 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef nonnull @.str, ptr noundef nonnull %x.i16)
  %3 = load i32, ptr %x.i16, align 4
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x.i16) #6
  br label %for.body

for.cond.cleanup:
  %y.0.le = add nsw i32 %call3.pn, %0
  %add7.le = add nsw i32 %y.0.le, %c.0
  ret i32 %add7.le

for.body:
  %i.023 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
  %b.022 = phi i32 [ %1, %entry ], [ %b.1, %if.end ]
  %tobool.not = icmp eq i32 %b.022, 0
  br i1 %tobool.not, label %if.else, label %if.then

if.then:
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x.i18) #6
  %call.i19 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef nonnull @.str, ptr noundef nonnull %x.i18)
  %4 = load i32, ptr %x.i18, align 4
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x.i18) #6
  br label %if.end

if.else:
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x.i20) #6
  %call.i21 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef nonnull @.str, ptr noundef nonnull %x.i20)
  %5 = load i32, ptr %x.i20, align 4
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x.i20) #6
  br label %if.end

if.end:
  %b.1 = phi i32 [ %4, %if.then ], [ %5, %if.else ]
  %call3.pn = phi i32 [ %3, %if.then ], [ %2, %if.else ]
  %c.0 = phi i32 [ %2, %if.then ], [ %3, %if.else ]
  %inc = add nuw nsw i32 %i.023, 1
  %exitcond.not = icmp eq i32 %inc, 1000
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}

贴一个Compiler Explorer的链接。
可以看到，LLVM没有看到两个Phi函数重结合的可能性，从而造成了冗余计算未成功消除。

Herbrand Equivalence

最开始我以为是二者之间的关系不属于Herbrand Equivalence所以不能被GVN算法识别，但细想下来如果能够将两个Phinode结合起来也可以直接消除该代码。

EarlyCSE Pass

从注释可以看出，该pass主要提供一些trivially redundant instructions的消除。也就是琐碎的冗余。看了一下，重结合也在这一步中被消除的。
后续的instcombine以及GVN能够解决另外的情况。

PreservedAnalyses EarlyCSEPass::run(Function &F,
                                    FunctionAnalysisManager &AM) {
  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  auto &AC = AM.getResult<AssumptionAnalysis>(F);
  auto *MSSA =
      UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
 
  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
 
  if (!CSE.run())
    return PreservedAnalyses::all();
 
  PreservedAnalyses PA;
  PA.preserveSet<CFGAnalyses>();
  if (UseMemorySSA)
    PA.preserve<MemorySSAAnalysis>();
  return PA;
}

上述代码是使用了新的PassManager的EarlyCSE Pass的主要部分，其中首先获取了当前函数的几个分析结果，然后构建一个EarlyCSE对象，调用其run方法，如果无改动，则返回PreserveAnalyses::all()，表示不需要改动任何分析结果，否则重新进行控制流图分析——构建新的控制流图，如果使用了MemorySSA且有改动，还需要重新进行MemorySSAAnalysis。

bool EarlyCSE::run() {
  // Note, deque is being used here because there is significant performance
  // gains over vector when the container becomes very large due to the
  // specific access patterns. For more information see the mailing list
  // discussion on this:
  // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
  std::deque<StackNode *> nodesToProcess;
 
  bool Changed = false;
 
  // Process the root node.
  nodesToProcess.push_back(new StackNode(
      AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
      AvailableGEPs, CurrentGeneration, DT.getRootNode(),
      DT.getRootNode()->begin(), DT.getRootNode()->end()));
 
  assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
 
  // Process the stack.
  while (!nodesToProcess.empty()) {
    // Grab the first item off the stack. Set the current generation, remove
    // the node from the stack, and process it.
    StackNode *NodeToProcess = nodesToProcess.back();
 
    // Initialize class members.
    CurrentGeneration = NodeToProcess->currentGeneration();
 
    // Check if the node needs to be processed.
    if (!NodeToProcess->isProcessed()) {
      // Process the node.
      Changed |= processNode(NodeToProcess->node());
      NodeToProcess->childGeneration(CurrentGeneration);
      NodeToProcess->process();
    } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
      // Push the next child onto the stack.
      DomTreeNode *child = NodeToProcess->nextChild();
      nodesToProcess.push_back(new StackNode(
          AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
          AvailableGEPs, NodeToProcess->childGeneration(), child,
          child->begin(), child->end()));
    } else {
      // It has been processed, and there are no more children to process,
      // so delete it and pop it off the stack.
      delete NodeToProcess;
      nodesToProcess.pop_back();
    }
  } // while (!nodes...)
 
  return Changed;
}

首先插入一个初始的nodeToProcess，使用的StackNode相当于数据流分析中的WorkList。当nodeToProcess非空时，如果当前节点没处理过，处理当前节点；如果当前节点处理过但子节点没处理完，将子节点插入nodesToProcess中；否则将子节点移除。

bool EarlyCSE::processNode(DomTreeNode *Node) {
  return Changed;
}

processNode非常的长，目的就是处理一个结点。
首先拿到结点对应的基本块，判断该基本块有几个前驱，如果只有一个前驱，则该基本块对应的支配树结点是其前驱基本块对应的支配树结点的子结点。否则的话暂时不考虑。

bool Changed = false;
  BasicBlock *BB = Node->getBlock();
 
  // If this block has a single predecessor, then the predecessor is the parent
  // of the domtree node and all of the live out memory values are still current
  // in this block.  If this block has multiple predecessors, then they could
  // have invalidated the live-out memory values of our parent value.  For now,
  // just be conservative and invalidate memory if this block has multiple
  // predecessors.
  if (!BB->getSinglePredecessor())
    ++CurrentGeneration;

下一部分代码处理前驱基本块为条件跳转指令的情况，通过getCondition获得跳转条件对应的指令。然后定义了LastStore存储最后一个non-volatile store instruction。

 // If this node has a single predecessor which ends in a conditional branch,
  // we can infer the value of the branch condition given that we took this
  // path.  We need the single predecessor to ensure there's not another path
  // which reaches this block where the condition might hold a different
  // value.  Since we're adding this to the scoped hash table (like any other
  // def), it will have been popped if we encounter a future merge block.
  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
    auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
    if (BI && BI->isConditional()) {
      auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
      if (CondInst && SimpleValue::canHandle(CondInst))
        Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
    }
  }
 
  /// LastStore - Keep track of the last non-volatile store that we saw... for
  /// as long as there in no instruction that reads memory.  If we see a store
  /// to the same location, we delete the dead store.  This zaps trivial dead
  /// stores which can occur in bitfield code among other things.
  Instruction *LastStore = nullptr;

接下来判断基本块中的指令是否可以消除，如果可以的话，消除之，否则将其标记为AvailableValues。其中isInstructionTriviallyDear主要是判断当前指令生成的value是否在其他指令中有使用，如果没有的话判定为dead instruction，执行一系列删除相关的操作。

// See if any instructions in the block can be eliminated.  If so, do it.  If
  // not, add them to AvailableValues.
  for (Instruction &Inst : make_early_inc_range(*BB)) {
    // Dead instructions should just be removed.
    if (isInstructionTriviallyDead(&Inst, &TLI)) {
      LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
      if (!DebugCounter::shouldExecute(CSECounter)) {
        LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
        continue;
      }
 
      salvageKnowledge(&Inst, &AC);
      salvageDebugInfo(Inst);
      removeMSSA(Inst);
      Inst.eraseFromParent();
      Changed = true;
      ++NumSimplify;
      continue;
    }

跳过一系列指令，看不懂。。。

// Skip assume intrinsics, they don't really have side effects (although
    // they're marked as such to ensure preservation of control dependencies),
    // and this pass will not bother with its removal. However, we should mark
    // its condition as true for all dominated blocks.
    if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
      auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
      if (CondI && SimpleValue::canHandle(CondI)) {
        LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
                          << '\n');
        AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
      } else
        LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
      continue;
    }
// Likewise, noalias intrinsics don't actually write.
    if (match(&Inst, m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
      LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
                        << '\n');
      continue;
    }
 
    // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
    if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
      LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
      continue;
    }
 
    // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
    if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
      LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
      continue;
    }

接下来一段是处理不变内存区域的，这种内存区域只能读不能写。

// We can skip all invariant.start intrinsics since they only read memory,
    // and we can forward values across it. For invariant starts without
    // invariant ends, we can use the fact that the invariantness never ends to
    // start a scope in the current generaton which is true for all future
    // generations.  Also, we dont need to consume the last store since the
    // semantics of invariant.start allow us to perform   DSE of the last
    // store, if there was a store following invariant.start. Consider:
    //
    // store 30, i8* p
    // invariant.start(p)
    // store 40, i8* p
    // We can DSE the store to 30, since the store 40 to invariant location p
    // causes undefined behaviour.
    if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
      // If there are any uses, the scope might end.
      if (!Inst.use_empty())
        continue;
      MemoryLocation MemLoc =
          MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
      // Don't start a scope if we already have a better one pushed
      if (!AvailableInvariants.count(MemLoc))
        AvailableInvariants.insert(MemLoc, CurrentGeneration);
      continue;
    }

处理Guard表达式类型的指令

 if (isGuard(&Inst)) {
      if (auto *CondI =
              dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
        if (SimpleValue::canHandle(CondI)) {
          // Do we already know the actual value of this condition?
          if (auto *KnownCond = AvailableValues.lookup(CondI)) {
            // Is the condition known to be true?
            if (isa<ConstantInt>(KnownCond) &&
                cast<ConstantInt>(KnownCond)->isOne()) {
              LLVM_DEBUG(dbgs()
                         << "EarlyCSE removing guard: " << Inst << '\n');
              salvageKnowledge(&Inst, &AC);
              removeMSSA(Inst);
              Inst.eraseFromParent();
              Changed = true;
              continue;
            } else
              // Use the known value if it wasn't true.
              cast<CallInst>(Inst).setArgOperand(0, KnownCond);
          }
          // The condition we're on guarding here is true for all dominated
          // locations.
          AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
        }
      }
 
      // Guard intrinsics read all memory, but don't write any memory.
      // Accordingly, don't update the generation but consume the last store (to
      // avoid an incorrect DSE).
      LastStore = nullptr;
      continue;
    }

接下来处理可以代数化简的表达式，SimplifyInstruction可以使用SimplifyQuery查询当前指令能否化简，V是可以被化简到的对象值。

// If the instruction can be simplified (e.g. X+0 = X) then replace it with
    // its simpler value.
    if (Value *V = simplifyInstruction(&Inst, SQ)) {
      LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << "  to: " << *V
                        << '\n');
      if (!DebugCounter::shouldExecute(CSECounter)) {
        LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
      } else {
        bool Killed = false;
        if (!Inst.use_empty()) {
          Inst.replaceAllUsesWith(V);
          Changed = true;
        }
        if (isInstructionTriviallyDead(&Inst, &TLI)) {
          salvageKnowledge(&Inst, &AC);
          removeMSSA(Inst);
          Inst.eraseFromParent();
          Changed = true;
          Killed = true;
        }
        if (Changed)
          ++NumSimplify;
        if (Killed)
          continue;
      }
    }

 // If this is a simple instruction that we can value number, process it.
    if (SimpleValue::canHandle(&Inst)) {
      if ([[maybe_unused]] auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
        assert(CI->getExceptionBehavior() != fp::ebStrict &&
               "Unexpected ebStrict from SimpleValue::canHandle()");
        assert((!CI->getRoundingMode() ||
                CI->getRoundingMode() != RoundingMode::Dynamic) &&
               "Unexpected dynamic rounding from SimpleValue::canHandle()");
      }
      // See if the instruction has an available value.  If so, use it.
      if (Value *V = AvailableValues.lookup(&Inst)) {
        LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << "  to: " << *V
                          << '\n');
        if (!DebugCounter::shouldExecute(CSECounter)) {
          LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
          continue;
        }
        combineIRFlags(Inst, V);
        Inst.replaceAllUsesWith(V);
        salvageKnowledge(&Inst, &AC);
        removeMSSA(Inst);
        Inst.eraseFromParent();
        Changed = true;
        ++NumCSE;
        continue;
      }
 
      // Otherwise, just remember that this value is available.
      AvailableValues.insert(&Inst, &Inst);
      continue;
    }

接下来是一些和内存相关的指令操作

ParseMemoryInst MemInst(&Inst, TTI);
    // If this is a non-volatile load, process it.
    if (MemInst.isValid() && MemInst.isLoad()) {
      // (conservatively) we can't peak past the ordering implied by this
      // operation, but we can add this load to our set of available values
      if (MemInst.isVolatile() || !MemInst.isUnordered()) {
        LastStore = nullptr;
        ++CurrentGeneration;
      }
 
      if (MemInst.isInvariantLoad()) {
        // If we pass an invariant load, we know that memory location is
        // indefinitely constant from the moment of first dereferenceability.
        // We conservatively treat the invariant_load as that moment.  If we
        // pass a invariant load after already establishing a scope, don't
        // restart it since we want to preserve the earliest point seen.
        auto MemLoc = MemoryLocation::get(&Inst);
        if (!AvailableInvariants.count(MemLoc))
          AvailableInvariants.insert(MemLoc, CurrentGeneration);
      }
 
      // If we have an available version of this load, and if it is the right
      // generation or the load is known to be from an invariant location,
      // replace this instruction.
      //
      // If either the dominating load or the current load are invariant, then
      // we can assume the current load loads the same value as the dominating
      // load.
      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
      if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
        LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
                          << "  to: " << *InVal.DefInst << '\n');
        if (!DebugCounter::shouldExecute(CSECounter)) {
          LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
          continue;
        }
        if (InVal.IsLoad)
          if (auto *I = dyn_cast<Instruction>(Op))
            combineMetadataForCSE(I, &Inst, false);
        if (!Inst.use_empty())
          Inst.replaceAllUsesWith(Op);
        salvageKnowledge(&Inst, &AC);
        removeMSSA(Inst);
        Inst.eraseFromParent();
        Changed = true;
        ++NumCSELoad;
        continue;
      }
 
      // Otherwise, remember that we have this instruction.
      AvailableLoads.insert(MemInst.getPointerOperand(),
                            LoadValue(&Inst, CurrentGeneration,
                                      MemInst.getMatchingId(),
                                      MemInst.isAtomic(),
                                      MemInst.isLoad()));
      LastStore = nullptr;
      continue;
    }

下面是一些杂七杂八的

// If this instruction may read from memory or throw (and potentially read
    // from memory in the exception handler), forget LastStore.  Load/store
    // intrinsics will indicate both a read and a write to memory.  The target
    // may override this (e.g. so that a store intrinsic does not read from
    // memory, and thus will be treated the same as a regular store for
    // commoning purposes).
    if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
        !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
      LastStore = nullptr;
 
    // If this is a read-only call, process it.
    if (CallValue::canHandle(&Inst)) {
      // If we have an available version of this call, and if it is the right
      // generation, replace this instruction.
      std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
      if (InVal.first != nullptr &&
          isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
                              &Inst)) {
        LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
                          << "  to: " << *InVal.first << '\n');
        if (!DebugCounter::shouldExecute(CSECounter)) {
          LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
          continue;
        }
        if (!Inst.use_empty())
          Inst.replaceAllUsesWith(InVal.first);
        salvageKnowledge(&Inst, &AC);
        removeMSSA(Inst);
        Inst.eraseFromParent();
        Changed = true;
        ++NumCSECall;
        continue;
      }
 
      // Otherwise, remember that we have this instruction.
      AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
      continue;
    }
 
    // Compare GEP instructions based on offset.
    if (GEPValue::canHandle(&Inst)) {
      auto *GEP = cast<GetElementPtrInst>(&Inst);
      APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
      GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset)
                               ? Offset.trySExtValue()
                               : std::nullopt);
      if (Value *V = AvailableGEPs.lookup(GEPVal)) {
        LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << "  to: " << *V
                          << '\n');
        combineIRFlags(Inst, V);
        Inst.replaceAllUsesWith(V);
        salvageKnowledge(&Inst, &AC);
        removeMSSA(Inst);
        Inst.eraseFromParent();
        Changed = true;
        ++NumCSEGEP;
        continue;
      }
 
      // Otherwise, just remember that we have this GEP.
      AvailableGEPs.insert(GEPVal, &Inst);
      continue;
    }
 
    // A release fence requires that all stores complete before it, but does
    // not prevent the reordering of following loads 'before' the fence.  As a
    // result, we don't need to consider it as writing to memory and don't need
    // to advance the generation.  We do need to prevent DSE across the fence,
    // but that's handled above.
    if (auto *FI = dyn_cast<FenceInst>(&Inst))
      if (FI->getOrdering() == AtomicOrdering::Release) {
        assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
        continue;
      }
 
    // write back DSE - If we write back the same value we just loaded from
    // the same location and haven't passed any intervening writes or ordering
    // operations, we can remove the write.  The primary benefit is in allowing
    // the available load table to remain valid and value forward past where
    // the store originally was.
    if (MemInst.isValid() && MemInst.isStore()) {
      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
      if (InVal.DefInst &&
          InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
        // It is okay to have a LastStore to a different pointer here if MemorySSA
        // tells us that the load and store are from the same memory generation.
        // In that case, LastStore should keep its present value since we're
        // removing the current store.
        assert((!LastStore ||
                ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
                    MemInst.getPointerOperand() ||
                MSSA) &&
               "can't have an intervening store if not using MemorySSA!");
        LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
        if (!DebugCounter::shouldExecute(CSECounter)) {
          LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
          continue;
        }
        salvageKnowledge(&Inst, &AC);
        removeMSSA(Inst);
        Inst.eraseFromParent();
        Changed = true;
        ++NumDSE;
        // We can avoid incrementing the generation count since we were able
        // to eliminate this store.
        continue;
      }
    }

 // Okay, this isn't something we can CSE at all.  Check to see if it is
    // something that could modify memory.  If so, our available memory values
    // cannot be used so bump the generation count.
    if (Inst.mayWriteToMemory()) {
      ++CurrentGeneration;
 
      if (MemInst.isValid() && MemInst.isStore()) {
        // We do a trivial form of DSE if there are two stores to the same
        // location with no intervening loads.  Delete the earlier store.
        if (LastStore) {
          if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
            LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
                              << "  due to: " << Inst << '\n');
            if (!DebugCounter::shouldExecute(CSECounter)) {
              LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
            } else {
              salvageKnowledge(&Inst, &AC);
              removeMSSA(*LastStore);
              LastStore->eraseFromParent();
              Changed = true;
              ++NumDSE;
              LastStore = nullptr;
            }
          }
          // fallthrough - we can exploit information about this store
        }
 
        // Okay, we just invalidated anything we knew about loaded values.  Try
        // to salvage *something* by remembering that the stored value is a live
        // version of the pointer.  It is safe to forward from volatile stores
        // to non-volatile loads, so we don't have to check for volatility of
        // the store.
        AvailableLoads.insert(MemInst.getPointerOperand(),
                              LoadValue(&Inst, CurrentGeneration,
                                        MemInst.getMatchingId(),
                                        MemInst.isAtomic(),
                                        MemInst.isLoad()));
 
        // Remember that this was the last unordered store we saw for DSE. We
        // don't yet handle DSE on ordered or volatile stores since we don't
        // have a good way to model the ordering requirement for following
        // passes  once the store is removed.  We could insert a fence, but
        // since fences are slightly stronger than stores in their ordering,
        // it's not clear this is a profitable transform. Another option would
        // be to merge the ordering with that of the post dominating store.
        if (MemInst.isUnordered() && !MemInst.isVolatile())
          LastStore = &Inst;
        else
          LastStore = nullptr;
      }
    }
  }

文章来源:https://blog.csdn.net/weixin_45207619/article/details/135447362
本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若内容造成侵权/违法违规/事实不符，请联系我的编程经验分享网邮箱：chenni525@qq.com进行投诉反馈，一经查实，立即删除！