One of our random test-generators found a miscompile bug in an AVX test-case. Bisecting identifies that the problem first appeared at r360526 (although it isn't clear to me whether that commit has a bug, or is exposing a latent bug -- by guess is it's exposing a problem). I verified that the problem still appears with a fairly modern version -- checked r370900. Below is a somewhat reduced source-level repro. $ cat test.cpp // ========================================================================== extern "C" int printf(const char *, ...); typedef unsigned char uchar; typedef double __attribute__((ext_vector_type(4))) double4; typedef double __m256d __attribute__((__vector_size__(32), __aligned__(32))); typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); typedef double __v2df __attribute__ ((__vector_size__ (16))); typedef double __v4df __attribute__ ((__vector_size__ (32))); typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32))); static void init(unsigned char pred, volatile void *data, unsigned size) { unsigned char *bytes = (unsigned char *)data; for (unsigned i = 0; i != size; ++i) bytes[i] = pred + i; } int main() { union { __m256d x; char c[32]; } zero_union; __m256d id6595; init(212, &id6595, sizeof(id6595)); __m256d id6597; init(106, &id6597, sizeof(id6597)); __m128d id6599; init(239, &id6599, sizeof(id6599)); __m256d id1725 = __builtin_shufflevector((__v2df)id6599, (__v2df)id6599, 0, 1, -1, -1); zero_union.x = id1725; for (unsigned i = 16; i != 32; ++i) zero_union.c[i] = 0; __m256d id6598 = zero_union.x; __m256d id6596 = (__m256d) ((__v4df)id6597+(__v4df)id6598); __m256d id6594 = __builtin_shufflevector((__v4df)id6595, (__v4df)id6596, 1, 5, 1+2, 5+2); volatile __m256d id6600; init(176, &id6600, sizeof(id6600)); volatile __m256d id6605; init(65, &id6605, sizeof(id6605)); __m256d id6606; init(66, &id6606, sizeof(id6606)); __m256d id6604 = (__m256d) (~(__v4du)id6605 & (__v4du)id6606); volatile __m256d id6609; init(168, &id6609, sizeof(id6609)); __attribute__((aligned(32))) uchar id6613; init(124, &id6613, sizeof(id6613)); __m256d id6610; init(217, &id6610, sizeof(id6610)); for (uchar id6611_idx = 0; (id6611_idx < id6613); ++id6611_idx) { volatile __m256d id6612; init(93, &id6612, sizeof(id6612)); id6610 *= id6612; } __m256d id6608 = __builtin_shufflevector((__v4df)id6609, (__v4df)id6610, 1, 5, 1+2, 5+2); static volatile __attribute__((aligned(2)))__m256d id6614 = __extension__ (__m256d){ 0, 0, 0, 0 }; __m256d id6607 = (__m256d)__builtin_ia32_haddpd256((__v4df)id6608, (__v4df)id6614); __m256d id6603 = (__m256d)__builtin_ia32_hsubpd256((__v4df)id6604, (__v4df)id6607); volatile __m256d id6602 = __builtin_shufflevector((__v4df)id6603, (__v4df)id6603, 0, 0, 2, 2); volatile __attribute__((aligned(32)))__m256d id6601 = (__m256d)__builtin_ia32_sqrtpd256((__v4df)id6602); __m256d id6593 = (__m256d)__builtin_ia32_blendvpd256( (__v4df)id6594, (__v4df)id6600, (__v4df)id6601); double4 id6592 = (static_cast<double4>(id6593)); printf("id6592:%lf %lf %lf %lf\n",id6592[0],id6592[1],id6592[2],id6592[3]); return 0; } // ========================================================================== $ ~/llvm.8.0/bin/clang++ -mavx -O0 -o test.elf test.cpp $ test.elf | tee llvm8.txt id6592:-0.000000 -0.120083 -60539778500895675077088930481106321408.000000 -27866005112515140108039219831221433722840879450495184416280192272475975843840.000000 $ ~/llvm.8.0/bin/clang++ -mavx -O2 -o test.elf test.cpp $ test.elf | diff llvm8.txt - $ r360526/bin/clang++ --version | egrep 'version'\|'Target' clang version 9.0.0 (trunk 360526) Target: x86_64-unknown-linux-gnu $ r360526/bin/clang++ -mavx -O0 -o test.elf test.cpp $ test.elf | diff llvm8.txt - $ r360526/bin/clang++ -mavx -O2 -o test.elf test.cpp $ test.elf | diff llvm8.txt - 1c1 < id6592:-0.000000 -0.120083 -60539778500895675077088930481106321408.000000 -27866005112515140108039219831221433722840879450495184416280192272475975843840.000000 --- > id6592:-0.000000 -5141221720570257117792454033359263003807236078306659704678783633844346063763670599376596080887364908982157086903115200913387331142513248047145337233885905544954992509425426830156252708146824920980885521333040555469700954412883838074547909394193152829277777742490970125863264566428463469688215073724563456.000000 -60539778500895675077088930481106321408.000000 -27866005112515140108039219831221433722840879450495184416280192272475975843840.000000 $
That was a latent bug, although I'm not sure how to expose it stand-alone. Reduced test added here: https://reviews.llvm.org/rL371088 Fix coming up...
I can try harder to reduce the test and/or add more extensive tests, but I wanted to get the code fix in ASAP: https://reviews.llvm.org/rL371095
Thanks for the quick response Sanjay! Merging this fix into 9.0 seems sensible to me. @craig.topper: what do you think?
Fixing for 9.0 sounds good to me.
(In reply to Warren Ristow from comment #3) > Thanks for the quick response Sanjay! Credit to Simon for noticing that the problem was in h-op generation. Sorry about the bug.
Merged to release_90 in r371178.