@@ -57,32 +57,30 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
57
57
; GISEL-LABEL: combine_vec_udiv_nonuniform:
58
58
; GISEL: // %bb.0:
59
59
; GISEL-NEXT: adrp x8, .LCPI1_4
60
- ; GISEL-NEXT: adrp x9, .LCPI1_0
60
+ ; GISEL-NEXT: adrp x9, .LCPI1_5
61
61
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4]
62
62
; GISEL-NEXT: adrp x8, .LCPI1_3
63
- ; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0]
64
63
; GISEL-NEXT: neg v1.8h, v1.8h
65
64
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
66
65
; GISEL-NEXT: adrp x8, .LCPI1_2
67
66
; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
68
67
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
69
68
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
70
69
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
71
- ; GISEL-NEXT: adrp x8, .LCPI1_5
70
+ ; GISEL-NEXT: adrp x8, .LCPI1_1
72
71
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
73
72
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
74
73
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
75
74
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
76
- ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5 ]
77
- ; GISEL-NEXT: adrp x8, .LCPI1_1
78
- ; GISEL-NEXT: cmeq v3.8h, v3.8h, v5 .8h
75
+ ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1 ]
76
+ ; GISEL-NEXT: adrp x8, .LCPI1_0
77
+ ; GISEL-NEXT: neg v3.8h, v3.8h
79
78
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
80
- ; GISEL-NEXT: ldr q4, [x8 , :lo12:.LCPI1_1 ]
81
- ; GISEL-NEXT: shl v3.8h, v3.8h, #15
79
+ ; GISEL-NEXT: ldr q4, [x9 , :lo12:.LCPI1_5 ]
80
+ ; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0]
82
81
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
83
- ; GISEL-NEXT: neg v2.8h, v4.8h
84
- ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
85
- ; GISEL-NEXT: sshr v2.8h, v3.8h, #15
82
+ ; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
83
+ ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
86
84
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
87
85
; GISEL-NEXT: ret
88
86
%1 = udiv <8 x i16 > %x , <i16 23 , i16 34 , i16 -23 , i16 56 , i16 128 , i16 -1 , i16 -256 , i16 -32768 >
@@ -108,25 +106,23 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
108
106
; GISEL-LABEL: combine_vec_udiv_nonuniform2:
109
107
; GISEL: // %bb.0:
110
108
; GISEL-NEXT: adrp x8, .LCPI2_3
111
- ; GISEL-NEXT: adrp x9, .LCPI2_4
112
- ; GISEL-NEXT: adrp x10, .LCPI2_0
109
+ ; GISEL-NEXT: adrp x9, .LCPI2_1
113
110
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3]
114
111
; GISEL-NEXT: adrp x8, .LCPI2_2
115
- ; GISEL-NEXT: ldr q4, [x10 , :lo12:.LCPI2_0 ]
112
+ ; GISEL-NEXT: ldr q4, [x9 , :lo12:.LCPI2_1 ]
116
113
; GISEL-NEXT: neg v1.8h, v1.8h
117
114
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2]
118
- ; GISEL-NEXT: adrp x8, .LCPI2_1
115
+ ; GISEL-NEXT: adrp x8, .LCPI2_4
119
116
; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
117
+ ; GISEL-NEXT: neg v4.8h, v4.8h
120
118
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
121
- ; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1]
122
119
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
123
- ; GISEL-NEXT: ldr q2, [x9 , :lo12:.LCPI2_4]
124
- ; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h
120
+ ; GISEL-NEXT: ldr q2, [x8 , :lo12:.LCPI2_4]
121
+ ; GISEL-NEXT: adrp x8, .LCPI2_0
125
122
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
126
- ; GISEL-NEXT: neg v3.8h, v5.8h
127
- ; GISEL-NEXT: shl v2.8h, v2.8h, #15
128
- ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
129
- ; GISEL-NEXT: sshr v2.8h, v2.8h, #15
123
+ ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_0]
124
+ ; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
125
+ ; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h
130
126
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
131
127
; GISEL-NEXT: ret
132
128
%1 = udiv <8 x i16 > %x , <i16 -34 , i16 35 , i16 36 , i16 -37 , i16 38 , i16 -39 , i16 40 , i16 -41 >
@@ -151,23 +147,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
151
147
; GISEL-LABEL: combine_vec_udiv_nonuniform3:
152
148
; GISEL: // %bb.0:
153
149
; GISEL-NEXT: adrp x8, .LCPI3_2
154
- ; GISEL-NEXT: adrp x9, .LCPI3_0
150
+ ; GISEL-NEXT: adrp x9, .LCPI3_3
155
151
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2]
156
- ; GISEL-NEXT: adrp x8, .LCPI3_3
157
- ; GISEL-NEXT: ldr q3 , [x9, :lo12:.LCPI3_0 ]
152
+ ; GISEL-NEXT: adrp x8, .LCPI3_1
153
+ ; GISEL-NEXT: ldr q4 , [x9, :lo12:.LCPI3_3 ]
158
154
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
159
155
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
160
156
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
161
- ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
162
- ; GISEL-NEXT: adrp x8, .LCPI3_1
163
- ; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
164
- ; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h
165
- ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
166
- ; GISEL-NEXT: shl v2.8h, v2.8h, #15
167
- ; GISEL-NEXT: usra v1.8h, v4.8h, #1
168
- ; GISEL-NEXT: neg v3.8h, v3.8h
169
- ; GISEL-NEXT: sshr v2.8h, v2.8h, #15
170
- ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
157
+ ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
158
+ ; GISEL-NEXT: adrp x8, .LCPI3_0
159
+ ; GISEL-NEXT: neg v2.8h, v2.8h
160
+ ; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
161
+ ; GISEL-NEXT: usra v1.8h, v3.8h, #1
162
+ ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_0]
163
+ ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
164
+ ; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
171
165
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
172
166
; GISEL-NEXT: ret
173
167
%1 = udiv <8 x i16 > %x , <i16 7 , i16 23 , i16 25 , i16 27 , i16 31 , i16 47 , i16 63 , i16 127 >
@@ -197,21 +191,19 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
197
191
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
198
192
; GISEL: // %bb.0:
199
193
; GISEL-NEXT: adrp x8, .LCPI4_2
200
- ; GISEL-NEXT: adrp x9, .LCPI4_0
194
+ ; GISEL-NEXT: adrp x9, .LCPI4_1
201
195
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
202
196
; GISEL-NEXT: adrp x8, .LCPI4_3
203
- ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0 ]
197
+ ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_1 ]
204
198
; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
205
199
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3]
206
200
; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
207
- ; GISEL-NEXT: adrp x8, .LCPI4_1
208
- ; GISEL-NEXT: cmeq v3.16b, v3 .16b, v4.16b
201
+ ; GISEL-NEXT: adrp x8, .LCPI4_0
202
+ ; GISEL-NEXT: neg v4 .16b, v4.16b
209
203
; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
210
- ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
211
- ; GISEL-NEXT: shl v3.16b, v3.16b, #7
212
- ; GISEL-NEXT: neg v2.16b, v2.16b
213
- ; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
214
- ; GISEL-NEXT: sshr v2.16b, v3.16b, #7
204
+ ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
205
+ ; GISEL-NEXT: cmeq v2.16b, v3.16b, v2.16b
206
+ ; GISEL-NEXT: ushl v1.16b, v1.16b, v4.16b
215
207
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
216
208
; GISEL-NEXT: ret
217
209
%div = udiv <16 x i8 > %x , <i8 -64 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 >
@@ -248,28 +240,26 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
248
240
; GISEL-LABEL: pr38477:
249
241
; GISEL: // %bb.0:
250
242
; GISEL-NEXT: adrp x8, .LCPI5_3
251
- ; GISEL-NEXT: adrp x9, .LCPI5_0
243
+ ; GISEL-NEXT: adrp x9, .LCPI5_4
252
244
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
253
245
; GISEL-NEXT: adrp x8, .LCPI5_2
254
- ; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0]
255
246
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
256
247
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
257
248
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
258
249
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
259
- ; GISEL-NEXT: adrp x8, .LCPI5_4
250
+ ; GISEL-NEXT: adrp x8, .LCPI5_1
260
251
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
261
252
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
262
253
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
263
- ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4 ]
264
- ; GISEL-NEXT: adrp x8, .LCPI5_1
265
- ; GISEL-NEXT: cmeq v3.8h, v3.8h, v5 .8h
254
+ ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1 ]
255
+ ; GISEL-NEXT: adrp x8, .LCPI5_0
256
+ ; GISEL-NEXT: neg v3.8h, v3.8h
266
257
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
267
- ; GISEL-NEXT: ldr q4, [x8 , :lo12:.LCPI5_1 ]
268
- ; GISEL-NEXT: shl v3.8h, v3.8h, #15
258
+ ; GISEL-NEXT: ldr q4, [x9 , :lo12:.LCPI5_4 ]
259
+ ; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0]
269
260
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
270
- ; GISEL-NEXT: neg v2.8h, v4.8h
271
- ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
272
- ; GISEL-NEXT: sshr v2.8h, v3.8h, #15
261
+ ; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
262
+ ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
273
263
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
274
264
; GISEL-NEXT: ret
275
265
%1 = udiv <8 x i16 > %a0 , <i16 1 , i16 119 , i16 73 , i16 -111 , i16 -3 , i16 118 , i16 32 , i16 31 >
0 commit comments