Merge "[patches] Cherry pick CLS for: Fix LLD crash" into main
diff --git a/patches/PATCHES.json b/patches/PATCHES.json
index 47b2623..45c6353 100644
--- a/patches/PATCHES.json
+++ b/patches/PATCHES.json
@@ -731,6 +731,20 @@
     {
         "metadata": {
             "info": [],
+            "title": "[UPSTREAM] [SLP]Fix a crash for reduced values with minbitwidth, which are reused."
+        },
+        "platforms": [
+            "android"
+        ],
+        "rel_patch_path": "cherry/39b2104b4a4e0990eddc763eab99b28e8deab953.patch",
+        "version_range": {
+            "from": 522817,
+            "until": 523953
+        }
+    },
+    {
+        "metadata": {
+            "info": [],
             "title": "[UPSTREAM] [libunwind][WebAssembly] Fix libunwind.cpp guard (#78230)"
         },
         "platforms": [
diff --git a/patches/cherry/39b2104b4a4e0990eddc763eab99b28e8deab953.patch b/patches/cherry/39b2104b4a4e0990eddc763eab99b28e8deab953.patch
new file mode 100644
index 0000000..817a9f2
--- /dev/null
+++ b/patches/cherry/39b2104b4a4e0990eddc763eab99b28e8deab953.patch
@@ -0,0 +1,78 @@
+From 39b2104b4a4e0990eddc763eab99b28e8deab953 Mon Sep 17 00:00:00 2001
+From: Alexey Bataev <a.bataev@outlook.com>
+Date: Fri, 12 Jan 2024 04:32:04 -0800
+Subject: [PATCH] [SLP]Fix a crash for reduced values with minbitwidth, which
+ are reused.
+
+If the reduced values are additionally affected by minbitwidth analysis,
+need to cast them to a proper type before doing any math, if they are
+reused.
+---
+ .../Transforms/Vectorize/SLPVectorizer.cpp    | 13 ++++++++
+ .../X86/reused-reductions-with-minbitwidth.ll | 30 +++++++++++++++++++
+ 2 files changed, 43 insertions(+)
+ create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll
+
+diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+index 055fbb00871f..66a3c257a76f 100644
+--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
++++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+@@ -15214,6 +15214,19 @@ private:
+     assert(IsSupportedHorRdxIdentityOp &&
+            "The optimization of matched scalar identity horizontal reductions "
+            "must be supported.");
++    auto *VTy = cast<FixedVectorType>(VectorizedValue->getType());
++    if (VTy->getElementType() != VL.front()->getType()) {
++      VectorizedValue = Builder.CreateIntCast(
++          VectorizedValue,
++          FixedVectorType::get(VL.front()->getType(), VTy->getNumElements()),
++          any_of(VL, [&](Value *R) {
++            KnownBits Known = computeKnownBits(
++                R, cast<Instruction>(ReductionOps.front().front())
++                       ->getModule()
++                       ->getDataLayout());
++            return !Known.isNonNegative();
++          }));
++    }
+     switch (RdxKind) {
+     case RecurKind::Add: {
+       // root = mul prev_root, <1, 1, n, 1>
+diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll
+new file mode 100644
+index 000000000000..5d22b5a4873b
+--- /dev/null
++++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-reductions-with-minbitwidth.ll
+@@ -0,0 +1,30 @@
++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
++; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mcpu=skylake < %s | FileCheck %s
++
++define i1 @test(i1 %cmp5.not.31) {
++; CHECK-LABEL: define i1 @test(
++; CHECK-SAME: i1 [[CMP5_NOT_31:%.*]]) #[[ATTR0:[0-9]+]] {
++; CHECK-NEXT:  entry:
++; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i1> <i1 poison, i1 false, i1 false, i1 false>, i1 [[CMP5_NOT_31]], i32 0
++; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
++; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i1>
++; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i32>
++; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> [[TMP3]], <i32 2, i32 1, i32 1, i32 1>
++; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
++; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 0
++; CHECK-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i32 [[TMP6]], 0
++; CHECK-NEXT:    ret i1 [[CMP_NOT_I_I]]
++;
++entry:
++  %add7.31 = select i1 %cmp5.not.31, i32 0, i32 0
++  %add18 = select i1 false, i32 0, i32 0
++  %add19 = add i32 %add18, %add7.31
++  %add18.1 = select i1 false, i32 0, i32 0
++  %add19.1 = add i32 %add18.1, %add19
++  %add18.4 = select i1 false, i32 0, i32 0
++  %add19.4 = add i32 %add18.4, %add19.1
++  %add19.31 = add i32 %add7.31, %add19.4
++  %0 = and i32 %add19.31, 0
++  %cmp.not.i.i = icmp eq i32 %0, 0
++  ret i1 %cmp.not.i.i
++}
+-- 
+2.44.0.769.g3c40516874-goog
+