[AArch64] Fold swapped sub/SUBS conditions #121412

davemgreen · 2024-12-31T19:07:25Z

This fold already exists in a couple places (DAG and CGP), where an icmps operands are swapped to allow CSE with a sub. They do not handle constants though. This patch adds an AArch64 version that can be more precise.

llvmbot · 2024-12-31T19:07:55Z

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

This fold already exists in a couple places (DAG and CGP), where an icmps operands are swapped to allow CSE with a sub. They do not handle constants though. This patch adds an AArch64 version that can be more precise.

Full diff: https://github.com/llvm/llvm-project/pull/121412.diff

3 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+24)
(modified) llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h (+30)
(modified) llvm/test/CodeGen/AArch64/csel-subs-swapped.ll (+20-32)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 24e1ebd8421fbf..970250d8fb766f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24854,6 +24854,30 @@ static SDValue performCSELCombine(SDNode *N,
   if (SDValue Folded = foldCSELofCTTZ(N, DAG))
 		return Folded;
 
+  // CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x)
+  // if SUB(y, x) already exists and we can produce a swapped predicate for cc.
+  SDValue Cond = N->getOperand(3);
+  if (DCI.isAfterLegalizeDAG() && Cond.getOpcode() == AArch64ISD::SUBS &&
+      Cond.hasOneUse() && Cond->hasNUsesOfValue(0, 0) &&
+      DAG.doesNodeExist(ISD::SUB, N->getVTList(),
+                        {Cond.getOperand(1), Cond.getOperand(0)}) &&
+      !DAG.doesNodeExist(ISD::SUB, N->getVTList(),
+                         {Cond.getOperand(0), Cond.getOperand(1)}) &&
+      !isNullConstant(Cond.getOperand(1))) {
+    AArch64CC::CondCode OldCond =
+        static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
+    AArch64CC::CondCode NewCond = getSwappedCondition(OldCond);
+    if (NewCond != AArch64CC::AL) {
+      SDLoc DL(N);
+      SDValue Sub = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
+                                Cond.getOperand(1), Cond.getOperand(0));
+      return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0),
+                         N->getOperand(1),
+                         DAG.getConstant(NewCond, DL, MVT::i32),
+                         Sub.getValue(1));
+    }
+  }
+
   return performCONDCombine(N, DCI, DAG, 2, 3);
 }
 
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index e0ccba4d6a59e8..39d83e61705f00 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -306,6 +306,36 @@ inline static CondCode getInvertedCondCode(CondCode Code) {
   return static_cast<CondCode>(static_cast<unsigned>(Code) ^ 0x1);
 }
 
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static CondCode getSwappedCondition(CondCode CC) {
+  switch (CC) {
+  default:
+    return AL;
+  case EQ:
+    return EQ;
+  case NE:
+    return NE;
+  case HS:
+    return LS;
+  case LO:
+    return HI;
+  case HI:
+    return LO;
+  case LS:
+    return HS;
+  case GE:
+    return LE;
+  case LT:
+    return GT;
+  case GT:
+    return LT;
+  case LE:
+    return GE;
+  }
+}
+
 /// Given a condition code, return NZCV flags that would satisfy that condition.
 /// The flag bits are in the format expected by the ccmp instructions.
 /// Note that many different flag settings can satisfy a given condition code,
diff --git a/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll b/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll
index 7c628cf1683d6f..3971da27cdddca 100644
--- a/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll
+++ b/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll
@@ -5,8 +5,7 @@ define i32 @eq_i32(i32 %x) {
 ; CHECK-LABEL: eq_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-2097152 // =0xffe00000
-; CHECK-NEXT:    cmn w0, #512, lsl #12 // =2097152
-; CHECK-NEXT:    sub w8, w8, w0
+; CHECK-NEXT:    subs w8, w8, w0
 ; CHECK-NEXT:    csel w0, w0, w8, eq
 ; CHECK-NEXT:    ret
   %cmp = icmp eq i32 %x, -2097152
@@ -19,8 +18,7 @@ define i32 @ne_i32(i32 %x) {
 ; CHECK-LABEL: ne_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-2097152 // =0xffe00000
-; CHECK-NEXT:    cmn w0, #512, lsl #12 // =2097152
-; CHECK-NEXT:    sub w8, w8, w0
+; CHECK-NEXT:    subs w8, w8, w0
 ; CHECK-NEXT:    csel w0, w0, w8, ne
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i32 %x, -2097152
@@ -33,9 +31,8 @@ define i32 @sgt_i32(i32 %x) {
 ; CHECK-LABEL: sgt_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-2097152 // =0xffe00000
-; CHECK-NEXT:    cmn w0, #512, lsl #12 // =2097152
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    csel w0, w0, w8, gt
+; CHECK-NEXT:    subs w8, w8, w0
+; CHECK-NEXT:    csel w0, w0, w8, lt
 ; CHECK-NEXT:    ret
   %cmp = icmp sgt i32 %x, -2097152
   %sub = sub i32 -2097152, %x
@@ -62,9 +59,8 @@ define i32 @slt_i32(i32 %x) {
 ; CHECK-LABEL: slt_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-2097152 // =0xffe00000
-; CHECK-NEXT:    cmn w0, #512, lsl #12 // =2097152
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    csel w0, w0, w8, lt
+; CHECK-NEXT:    subs w8, w8, w0
+; CHECK-NEXT:    csel w0, w0, w8, gt
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i32 %x, -2097152
   %sub = sub i32 -2097152, %x
@@ -91,9 +87,8 @@ define i32 @ugt_i32(i32 %x) {
 ; CHECK-LABEL: ugt_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-2097152 // =0xffe00000
-; CHECK-NEXT:    cmn w0, #512, lsl #12 // =2097152
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    csel w0, w0, w8, hi
+; CHECK-NEXT:    subs w8, w8, w0
+; CHECK-NEXT:    csel w0, w0, w8, lo
 ; CHECK-NEXT:    ret
   %cmp = icmp ugt i32 %x, -2097152
   %sub = sub i32 -2097152, %x
@@ -120,9 +115,8 @@ define i32 @ult_i32(i32 %x) {
 ; CHECK-LABEL: ult_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-2097152 // =0xffe00000
-; CHECK-NEXT:    cmn w0, #512, lsl #12 // =2097152
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    csel w0, w0, w8, lo
+; CHECK-NEXT:    subs w8, w8, w0
+; CHECK-NEXT:    csel w0, w0, w8, hi
 ; CHECK-NEXT:    ret
   %cmp = icmp ult i32 %x, -2097152
   %sub = sub i32 -2097152, %x
@@ -150,8 +144,7 @@ define i64 @eq_i64(i64 %x) {
 ; CHECK-LABEL: eq_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #100 // =0x64
-; CHECK-NEXT:    cmp x0, #100
-; CHECK-NEXT:    sub x8, x8, x0
+; CHECK-NEXT:    subs x8, x8, x0
 ; CHECK-NEXT:    csel x0, x0, x8, eq
 ; CHECK-NEXT:    ret
   %cmp = icmp eq i64 %x, 100
@@ -164,8 +157,7 @@ define i64 @ne_i64(i64 %x) {
 ; CHECK-LABEL: ne_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #100 // =0x64
-; CHECK-NEXT:    cmp x0, #100
-; CHECK-NEXT:    sub x8, x8, x0
+; CHECK-NEXT:    subs x8, x8, x0
 ; CHECK-NEXT:    csel x0, x0, x8, ne
 ; CHECK-NEXT:    ret
   %cmp = icmp ne i64 %x, 100
@@ -178,9 +170,8 @@ define i64 @sgt_i64(i64 %x) {
 ; CHECK-LABEL: sgt_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #100 // =0x64
-; CHECK-NEXT:    cmp x0, #100
-; CHECK-NEXT:    sub x8, x8, x0
-; CHECK-NEXT:    csel x0, x0, x8, gt
+; CHECK-NEXT:    subs x8, x8, x0
+; CHECK-NEXT:    csel x0, x0, x8, lt
 ; CHECK-NEXT:    ret
   %cmp = icmp sgt i64 %x, 100
   %sub = sub i64 100, %x
@@ -206,9 +197,8 @@ define i64 @slt_i64(i64 %x) {
 ; CHECK-LABEL: slt_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #100 // =0x64
-; CHECK-NEXT:    cmp x0, #100
-; CHECK-NEXT:    sub x8, x8, x0
-; CHECK-NEXT:    csel x0, x0, x8, lt
+; CHECK-NEXT:    subs x8, x8, x0
+; CHECK-NEXT:    csel x0, x0, x8, gt
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i64 %x, 100
   %sub = sub i64 100, %x
@@ -234,9 +224,8 @@ define i64 @ugt_i64(i64 %x) {
 ; CHECK-LABEL: ugt_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #100 // =0x64
-; CHECK-NEXT:    cmp x0, #100
-; CHECK-NEXT:    sub x8, x8, x0
-; CHECK-NEXT:    csel x0, x0, x8, hi
+; CHECK-NEXT:    subs x8, x8, x0
+; CHECK-NEXT:    csel x0, x0, x8, lo
 ; CHECK-NEXT:    ret
   %cmp = icmp ugt i64 %x, 100
   %sub = sub i64 100, %x
@@ -262,9 +251,8 @@ define i64 @ult_i64(i64 %x) {
 ; CHECK-LABEL: ult_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #100 // =0x64
-; CHECK-NEXT:    cmp x0, #100
-; CHECK-NEXT:    sub x8, x8, x0
-; CHECK-NEXT:    csel x0, x0, x8, lo
+; CHECK-NEXT:    subs x8, x8, x0
+; CHECK-NEXT:    csel x0, x0, x8, hi
 ; CHECK-NEXT:    ret
   %cmp = icmp ult i64 %x, 100
   %sub = sub i64 100, %x

[AArch64] Fold swapped sub/SUBS conditions

4bd0e47

This fold already exists in a couple places (DAG and CGP), where an icmps operands are swapped to allow CSE with a sub. They do not handle constants though. This patch adds an AArch64 version that can be more precise.

davemgreen requested review from SamTebbs33, nasherm and sjoerdmeijer December 31, 2024 19:07

llvmbot added the backend:AArch64 label Dec 31, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64] Fold swapped sub/SUBS conditions #121412

[AArch64] Fold swapped sub/SUBS conditions #121412

davemgreen commented Dec 31, 2024

llvmbot commented Dec 31, 2024

[AArch64] Fold swapped sub/SUBS conditions #121412

Are you sure you want to change the base?

[AArch64] Fold swapped sub/SUBS conditions #121412

Conversation

davemgreen commented Dec 31, 2024

llvmbot commented Dec 31, 2024