Skip to content

Commit e8bf0f7

Browse files
committed
[InstCombine] canonicalize more unsigned saturated add with 'not'
Yet another pattern variation suggested by: https://bugs.llvm.org/show_bug.cgi?id=14613 There are 8 more potential commuted patterns here on top of the 8 that were already handled (rL354221, rL354276, rL354393). We have the obvious commute of the 'add' + commute of the cmp predicate/operands (ugt/ult) + commute of the select operands: Name: base %notx = xor i32 %x, -1 %a = add i32 %notx, %y %c = icmp ult i32 %x, %y %r = select i1 %c, i32 -1, i32 %a => %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a Name: ugt %notx = xor i32 %x, -1 %a = add i32 %notx, %y %c = icmp ugt i32 %y, %x %r = select i1 %c, i32 -1, i32 %a => %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a Name: commute select %notx = xor i32 %x, -1 %a = add i32 %notx, %y %c = icmp ult i32 %y, %x %r = select i1 %c, i32 %a, i32 -1 => %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a Name: ugt + commute select %notx = xor i32 %x, -1 %a = add i32 %notx, %y %c = icmp ugt i32 %x, %y %r = select i1 %c, i32 %a, i32 -1 => %c2 = icmp ult i32 %a, %y %r = select i1 %c2, i32 -1, i32 %a https://rise4fun.com/Alive/den llvm-svn: 354887
1 parent c9af54b commit e8bf0f7

File tree

2 files changed

+35
-24
lines changed

2 files changed

+35
-24
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,17 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
725725
Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y);
726726
return Builder.CreateSelect(NewCmp, TVal, FVal);
727727
}
728+
// The 'not' op may be included in the sum but not the compare.
729+
X = Cmp0;
730+
Y = Cmp1;
731+
if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
732+
// Change the comparison to use the sum (false value of the select). That is
733+
// a canonical pattern match form for uadd.with.overflow:
734+
// (X u< Y) ? -1 : (~X + Y) --> ((~X + Y) u< Y) ? -1 : (~X + Y)
735+
// (X u< Y) ? -1 : (Y + ~X) --> ((Y + ~X) u< Y) ? -1 : (Y + ~X)
736+
Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y);
737+
return Builder.CreateSelect(NewCmp, TVal, FVal);
738+
}
728739

729740
return nullptr;
730741
}

llvm/test/Transforms/InstCombine/saturating-add-sub.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -803,9 +803,9 @@ define i32 @uadd_sat_not(i32 %x, i32 %y) {
803803
; CHECK-LABEL: @uadd_sat_not(
804804
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
805805
; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
806-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X]], [[Y]]
807-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]]
808-
; CHECK-NEXT: ret i32 [[R]]
806+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
807+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
808+
; CHECK-NEXT: ret i32 [[TMP2]]
809809
;
810810
%notx = xor i32 %x, -1
811811
%a = add i32 %notx, %y
@@ -820,9 +820,9 @@ define i32 @uadd_sat_not_commute_add(i32 %xp, i32 %yp) {
820820
; CHECK-NEXT: [[Y:%.*]] = urem i32 42, [[YP:%.*]]
821821
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1
822822
; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[Y]], [[NOTX]]
823-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X]], [[Y]]
824-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]]
825-
; CHECK-NEXT: ret i32 [[R]]
823+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
824+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
825+
; CHECK-NEXT: ret i32 [[TMP2]]
826826
;
827827
%x = srem i32 42, %xp ; thwart complexity-based-canonicalization
828828
%y = urem i32 42, %yp ; thwart complexity-based-canonicalization
@@ -837,9 +837,9 @@ define i32 @uadd_sat_not_ugt(i32 %x, i32 %y) {
837837
; CHECK-LABEL: @uadd_sat_not_ugt(
838838
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
839839
; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
840-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[Y]], [[X]]
841-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]]
842-
; CHECK-NEXT: ret i32 [[R]]
840+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
841+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
842+
; CHECK-NEXT: ret i32 [[TMP2]]
843843
;
844844
%notx = xor i32 %x, -1
845845
%a = add i32 %notx, %y
@@ -853,9 +853,9 @@ define <2 x i32> @uadd_sat_not_ugt_commute_add(<2 x i32> %x, <2 x i32> %yp) {
853853
; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], <i32 2442, i32 4242>
854854
; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
855855
; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]]
856-
; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[Y]], [[X]]
857-
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
858-
; CHECK-NEXT: ret <2 x i32> [[R]]
856+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
857+
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
858+
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
859859
;
860860
%y = sdiv <2 x i32> %yp, <i32 2442, i32 4242> ; thwart complexity-based-canonicalization
861861
%notx = xor <2 x i32> %x, <i32 -1, i32 -1>
@@ -869,9 +869,9 @@ define i32 @uadd_sat_not_commute_select(i32 %x, i32 %y) {
869869
; CHECK-LABEL: @uadd_sat_not_commute_select(
870870
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
871871
; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
872-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[Y]], [[X]]
873-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
874-
; CHECK-NEXT: ret i32 [[R]]
872+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
873+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
874+
; CHECK-NEXT: ret i32 [[TMP2]]
875875
;
876876
%notx = xor i32 %x, -1
877877
%a = add i32 %notx, %y
@@ -885,9 +885,9 @@ define i32 @uadd_sat_not_commute_select_commute_add(i32 %x, i32 %yp) {
885885
; CHECK-NEXT: [[Y:%.*]] = sdiv i32 42, [[YP:%.*]]
886886
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
887887
; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[NOTX]]
888-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[Y]], [[X]]
889-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
890-
; CHECK-NEXT: ret i32 [[R]]
888+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
889+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
890+
; CHECK-NEXT: ret i32 [[TMP2]]
891891
;
892892
%y = sdiv i32 42, %yp ; thwart complexity-based-canonicalization
893893
%notx = xor i32 %x, -1
@@ -903,9 +903,9 @@ define <2 x i32> @uadd_sat_not_commute_select_ugt(<2 x i32> %xp, <2 x i32> %yp)
903903
; CHECK-NEXT: [[Y:%.*]] = srem <2 x i32> <i32 12, i32 412>, [[YP:%.*]]
904904
; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1>
905905
; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]]
906-
; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[X]], [[Y]]
907-
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[A]], <2 x i32> <i32 -1, i32 -1>
908-
; CHECK-NEXT: ret <2 x i32> [[R]]
906+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
907+
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
908+
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
909909
;
910910
%x = urem <2 x i32> <i32 42, i32 -42>, %xp ; thwart complexity-based-canonicalization
911911
%y = srem <2 x i32> <i32 12, i32 412>, %yp ; thwart complexity-based-canonicalization
@@ -920,9 +920,9 @@ define i32 @uadd_sat_not_commute_select_ugt_commute_add(i32 %x, i32 %y) {
920920
; CHECK-LABEL: @uadd_sat_not_commute_select_ugt_commute_add(
921921
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
922922
; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
923-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[X]], [[Y]]
924-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
925-
; CHECK-NEXT: ret i32 [[R]]
923+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
924+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
925+
; CHECK-NEXT: ret i32 [[TMP2]]
926926
;
927927
%notx = xor i32 %x, -1
928928
%a = add i32 %notx, %y

0 commit comments

Comments
 (0)