Created attachment 23719 [details] LLVM IR before Combine redundant instructions on function Error: >$ clang++ -c -O3 func.cpp fatal error: error in backend: Instruction Combining seems stuck in an infinite loop after 100 iterations. PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace, preprocessed source, and associated run script. Stack dump: 0. Program arguments: clang++ -c -O3 func.cpp 1. <eof> parser at end of file 2. Per-module optimization passes 3. Running pass 'CallGraph Pass Manager' on module 'func.cpp'. 4. Running pass 'Combine redundant instructions' on function '@_Z4testjxxhyPA23_A15_t' #0 0x0000561a024b48ce llvm::sys::PrintStackTrace(llvm::raw_ostream&) (/clang-11+0x23a58ce) #1 0x0000561a024b26a4 llvm::sys::RunSignalHandlers() (/clang-11+0x23a36a4) #2 0x0000561a024b2921 llvm::sys::CleanupOnSignal(unsigned long) (/clang-11+0x23a3921) #3 0x0000561a0241f7c3 llvm::CrashRecoveryContext::HandleExit(int) (/clang-11+0x23107c3) #4 0x0000561a024aaeeb llvm::sys::Process::Exit(int) (/clang-11+0x239beeb) #5 0x0000561a00d43951 LLVMErrorHandler(void*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool) (/clang-11+0xc34951) #6 0x0000561a02426f5c llvm::report_fatal_error(llvm::Twine const&, bool) (/clang-11+0x2317f5c) #7 0x0000561a01f963c9 combineInstructionsOverFunction(llvm::Function&, llvm::InstCombineWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::ProfileSummaryInfo*, unsigned int, llvm::LoopInfo*) (/clang-11+0x1e873c9) #8 0x0000561a01f97dd0 llvm::InstructionCombiningPass::runOnFunction(llvm::Function&) (/clang-11+0x1e88dd0) #9 0x0000561a01de8abc llvm::FPPassManager::runOnFunction(llvm::Function&) (/clang-11+0x1cd9abc) #10 0x0000561a05186f2f (anonymous namespace)::CGPassManager::runOnModule(llvm::Module&) (/clang-11+0x5077f2f) #11 0x0000561a01de9537 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/clang-11+0x1cda537) #12 0x0000561a027618b9 clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::DataLayout const&, llvm::Module*, clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream> >) (/clang-11+0x26528b9) #13 0x0000561a0344e7b1 clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) (/clang-11+0x333f7b1) #14 0x0000561a041b3449 clang::ParseAST(clang::Sema&, bool, bool) (/clang-11+0x40a4449) #15 0x0000561a0344d2b8 clang::CodeGenAction::ExecuteAction() (/clang-11+0x333e2b8) #16 0x0000561a02d8c739 clang::FrontendAction::Execute() (/clang-11+0x2c7d739) #17 0x0000561a02d433b6 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (/clang-11+0x2c343b6) #18 0x0000561a02e60630 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (/clang-11+0x2d51630) #19 0x0000561a00d446b1 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (/clang-11+0xc356b1) #20 0x0000561a00d419e8 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&) (/clang-11+0xc329e8) #21 0x0000561a02c04cc9 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<llvm::Optional<llvm::StringRef> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, bool*) const::'lambda'()>(long) (/clang-11+0x2af5cc9) #22 0x0000561a0241f68c llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (/clang-11+0x231068c) #23 0x0000561a02c055e6 clang::driver::CC1Command::Execute(llvm::ArrayRef<llvm::Optional<llvm::StringRef> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, bool*) const (.part.0) (/clang-11+0x2af65e6) #24 0x0000561a02bdcc8c clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&) const (/clang-11+0x2acdc8c) #25 0x0000561a02bdd5c6 clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*> >&) const (/clang-11+0x2ace5c6) #26 0x0000561a02be6809 clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*> >&) (/clang-11+0x2ad7809) #27 0x0000561a00cc4209 main (/clang-11+0xbb5209) #28 0x00007f49c08e11e3 __libc_start_main /build/glibc-5mDdLG/glibc-2.30/csu/../csu/libc-start.c:342:3 #29 0x0000561a00d4153e _start (/clang-11+0xc3253e) clang-11: error: clang frontend command failed with exit code 70 (use -v to see invocation) clang version 11.0.0 (https://github.com/llvm/llvm-project.git 9ff310d5bfa56bb5f29645e2d2fee12115c3ddb7) Target: x86_64-unknown-linux-gnu Thread model: posix Reproducer: #include <algorithm> extern char var_30, var_48; extern int var_31; extern int arr_63[][10][10][10]; extern bool arr_74[]; extern int arr_75[]; void test(unsigned a, long long e, long long b, unsigned char c, unsigned long long d, unsigned short f[][23][15]) { for (short g = 0; g < c; g = 994) for (int h = 0; h < 2; h = 8) for (int i = 0; 0 < e;) { #pragma clang loop unroll(enable) for (int j = 0; j < 18; j++) for (int k = 0; k < 19LL; k += 1LL) { var_30 = f[g][h][0]; var_31 = b ? std::min((long long)0, e) : d; } for (int l = 0; l < 6; l += 2) arr_63[g][h][i][l] = std::max(d, std::min((unsigned long long)a, d)); for (int m = 0; m < 8; m++) for (char o = 0; o < 022; o += 4) arr_74[m] = arr_75[m] = char(3 + m); for (char n = 0; n < 5; n += 5) var_48 = 0; } } Clang version: 11.0.0 (https://github.com/llvm/llvm-project.git 9ff310d5bfa56bb5f29645e2d2fee12115c3ddb7)
Created attachment 23721 [details] mostly reduced input Hm, this one is practically identical to the #46661. The new problem here is that after we've just successfully did `mergeStoreIntoSuccessor()`, there are some instructions before the next store, that are not okay as per `IsNoopInstrForStoreMerging()` check there. But those instructions are sunk into next basic block by instcombine. So during next instcombine iterations, those instructions are no longer there and we manage to sink the store, too. I'm not sure how to approach this one.
Reverted thresholds again for now in 4500db8c59621a31c622862a2946457fdee481ce
@Roman: The correct way to fix this is to perform store merging as a backwards walk from an unconditional branch, instead of a forward walk from a store. Generally, InstCombine instruction scans should always go backwards.
(In reply to Nikita Popov from comment #3) > @Roman: The correct way to fix this is to perform store merging as a > backwards walk from an unconditional branch, instead of a forward walk from > a store. Generally, InstCombine instruction scans should always go backwards. IOW are you saying that this particular transform should be in visitBr() ? I'm not fully sure if that would be enough, or what worklist management would be needed, because we'll still have sinking issue, i think.
(In reply to Roman Lebedev from comment #4) > IOW are you saying that this particular transform should be in visitBr() ? That's right. > I'm not fully sure if that would be enough, or what worklist management > would be needed, because we'll still have sinking issue, i think. There shouldn't be any explicit management needed, just reporting a modification on the br should be enough for reprocessing (alternatively handle all stores in one loop, but likely iterator invalidation gets in the way there). Because InstCombine (approximately) folds instructions in instruction order, a backwards scan from the br will see already simplified and sunk instructions (approximately...)
(In reply to Nikita Popov from comment #5) > (In reply to Roman Lebedev from comment #4) > > IOW are you saying that this particular transform should be in visitBr() ? > > That's right. I see. > > I'm not fully sure if that would be enough, or what worklist management > > would be needed, because we'll still have sinking issue, i think. > > There shouldn't be any explicit management needed, just reporting a > modification on the br should be enough for reprocessing (alternatively > handle all stores in one loop, but likely iterator invalidation gets in the > way there). > Because InstCombine (approximately) folds instructions in > instruction order, a backwards scan from the br will see already simplified > and sunk instructions (approximately...) Nope, that't the thing. We have: a1 = a2 = store (a1, a2) b1 = b2 = store (b1, b2) br %label Even if we sink the store `store (b1, b2)` when visiting branch, we we didn't yet sunk the `b1` and `b2`, because they were used by `store (b1, b2)` that we didn't sink yet. So after sinking `store (b1, b2)`, we'd need to first sink `b1` and `b2`, and only then can we sink `store (a1, a2)`
(In reply to Roman Lebedev from comment #6) > Nope, that't the thing. > We have: > > a1 = > a2 = > store (a1, a2) > b1 = > b2 = > store (b1, b2) > br %label > > Even if we sink the store `store (b1, b2)` when visiting branch, > we we didn't yet sunk the `b1` and `b2`, because they were used by > `store (b1, b2)` that we didn't sink yet. > > So after sinking `store (b1, b2)`, we'd need to first sink `b1` and `b2`, > and only then can we sink `store (a1, a2)` I see, thanks. In that case I'd additionally try to add a Worklist.addValue(MergedVal) call at https://github.com/llvm/llvm-project/blob/master/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp#L1545 and a Worklist.addValue(SI.getOperand(0)) call in the if. I think that will process things in the right order (first the now potentially sinkable instructions, then the the branch), though would have to try to be sure.
(In reply to Roman Lebedev from comment #6) > (In reply to Nikita Popov from comment #5) > > (In reply to Roman Lebedev from comment #4) > > > IOW are you saying that this particular transform should be in visitBr() ? > > > > That's right. > I see. https://reviews.llvm.org/D83670
Actually, my explanation is not even full. Here's IR after mergeStoreIntoSuccessor() last time succeeds in iteration #1: IC: ADD: %storemerge1 = phi i8 [ %19, %13 ], [ %12, %6 ] IC: ADD: store i8 %storemerge1, i8* @c, align 1 IC: ERASE store i8 %19, i8* @c, align 1, !tbaa !8 IC: ADD DEFERRED: %19 = trunc i16 %18 to i8 IC: ERASE store i8 %12, i8* @c, align 1, !tbaa !8 IC: ADD DEFERRED: %12 = trunc i16 %11 to i8 ; Function Attrs: noreturn nounwind uwtable define dso_local void @_Z1gPs(i16* nocapture readonly %0) local_unnamed_addr #1 { %2 = load i64, i64* @d, align 8, !tbaa !2 %3 = icmp eq i64 %2, 0 %4 = load i64, i64* @a, align 8 %5 = icmp ne i64 %4, 0 br i1 %3, label %13, label %6 6: ; preds = %1 %7 = load i16, i16* %0, align 2, !tbaa !6 %8 = trunc i16 %7 to i8 store i8 %8, i8* @c, align 1, !tbaa !8 tail call void @llvm.assume(i1 %5) #3 %9 = load i16, i16* %0, align 2, !tbaa !6 %10 = trunc i16 %9 to i8 store i8 %10, i8* @c, align 1, !tbaa !8 %11 = load i16, i16* %0, align 2, !tbaa !6 %12 = trunc i16 %11 to i8 br label %20 13: ; preds = %1 %14 = load i16, i16* %0, align 2, !tbaa !6 %15 = trunc i16 %14 to i8 store i8 %15, i8* @c, align 1, !tbaa !8 %16 = load i16, i16* %0, align 2, !tbaa !6 %17 = trunc i16 %16 to i8 store i8 %17, i8* @c, align 1, !tbaa !8 %18 = load i16, i16* %0, align 2, !tbaa !6 %19 = trunc i16 %18 to i8 br label %20 20: ; preds = %13, %6 %storemerge1 = phi i8 [ %19, %13 ], [ %12, %6 ] store i8 %storemerge1, i8* @c, align 1, !tbaa !8 %storemerge.in = load i16, i16* %0, align 2, !tbaa !6 %storemerge = trunc i16 %storemerge.in to i8 store i8 %storemerge, i8* @c, align 1, !tbaa !8 br label %21 21: ; preds = %21, %20 br label %21 } IC: ADD DEFERRED: br label %20 IC: ADD: br label %20 IC: ADD: %12 = trunc i16 %11 to i8 IC: ADD: %19 = trunc i16 %18 to i8 IC: Visiting: %19 = trunc i16 %18 to i8 IC: Visiting: %12 = trunc i16 %11 to i8 IC: Visiting: br label %20 IC: Visiting: store i8 %storemerge1, i8* @c, align 1, !tbaa !8 IC: Visiting: %storemerge1 = phi i8 [ %19, %13 ], [ %12, %6 ] IC: ADD: %storemerge1.in = phi i16 [ %18, %13 ], [ %11, %6 ] IC: Old = %storemerge1 = phi i8 [ %19, %13 ], [ %12, %6 ] New = <badref> = trunc i16 %storemerge1.in to i8 IC: ADD: store i8 %storemerge1, i8* @c, align 1, !tbaa !8 IC: ADD: %storemerge1 = trunc i16 %storemerge1.in to i8 IC: ERASE %21 = phi i8 [ %19, %13 ], [ %12, %6 ] IC: ADD DEFERRED: %19 = trunc i16 %18 to i8 IC: ADD DEFERRED: %12 = trunc i16 %11 to i8 IC: ERASE %12 = trunc i16 %11 to i8 IC: ADD DEFERRED: %11 = load i16, i16* %0, align 2, !tbaa !6 IC: ADD: %11 = load i16, i16* %0, align 2, !tbaa !6 IC: ERASE %18 = trunc i16 %17 to i8 IC: ADD DEFERRED: %17 = load i16, i16* %0, align 2, !tbaa !6 IC: ADD: %17 = load i16, i16* %0, align 2, !tbaa !6 IC: Visiting: %17 = load i16, i16* %0, align 2, !tbaa !6 IC: Visiting: %11 = load i16, i16* %0, align 2, !tbaa !6 IC: Visiting: %storemerge1 = trunc i16 %storemerge1.in to i8 IC: Visiting: store i8 %storemerge1, i8* @c, align 1, !tbaa !8 IC: Visiting: %storemerge1.in = phi i16 [ %17, %12 ], [ %11, %6 ] IC: Old = %storemerge1.in = phi i16 [ %17, %12 ], [ %11, %6 ] New = <badref> = load i16, i16* %0, align 2, !tbaa <0x21e1798> IC: ADD: %storemerge1 = trunc i16 %storemerge1.in to i8 IC: ADD: %storemerge1.in = load i16, i16* %0, align 2, !tbaa !6 IC: ERASE %19 = phi i16 [ %17, %12 ], [ %11, %6 ] IC: ADD DEFERRED: %17 = load i16, i16* %0, align 2, !tbaa !6 IC: ADD DEFERRED: %11 = load i16, i16* %0, align 2, !tbaa !6 IC: ERASE %11 = load i16, i16* %0, align 2, !tbaa !6 IC: ERASE %16 = load i16, i16* %0, align 2, !tbaa !6 IC: Visiting: %storemerge1.in = load i16, i16* %0, align 2, !tbaa !6 IC: Visiting: %storemerge1 = trunc i16 %storemerge1.in to i8 The problem isn't that we don't sink operands of the sunked store, it's that we can't sink them because what we need to happen is for us to visit the PHI node, that will "sink" the truncs, then we can sink the load, and only then should we revisit the branch..
Fixed by https://reviews.llvm.org/D84109.
(Still needs to be cherry-picked, no?)
(In reply to Roman Lebedev from comment #11) > (Still needs to be cherry-picked, no?) Let's have it bake in trunk for another day, then I'll cherry-pick it. Please let me know if there are any issues.
Pushed to 11.x as eb3c5db40a1450d50c387f3a42f4c095001220cb