clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/X86 -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/X86/X86ISelLowering.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | #include "X86ISelLowering.h" |
15 | #include "MCTargetDesc/X86ShuffleDecode.h" |
16 | #include "X86.h" |
17 | #include "X86CallingConv.h" |
18 | #include "X86FrameLowering.h" |
19 | #include "X86InstrBuilder.h" |
20 | #include "X86IntrinsicsInfo.h" |
21 | #include "X86MachineFunctionInfo.h" |
22 | #include "X86TargetMachine.h" |
23 | #include "X86TargetObjectFile.h" |
24 | #include "llvm/ADT/SmallBitVector.h" |
25 | #include "llvm/ADT/SmallSet.h" |
26 | #include "llvm/ADT/Statistic.h" |
27 | #include "llvm/ADT/StringExtras.h" |
28 | #include "llvm/ADT/StringSwitch.h" |
29 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
30 | #include "llvm/Analysis/EHPersonalities.h" |
31 | #include "llvm/Analysis/ObjCARCUtil.h" |
32 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
33 | #include "llvm/Analysis/VectorUtils.h" |
34 | #include "llvm/CodeGen/IntrinsicLowering.h" |
35 | #include "llvm/CodeGen/MachineFrameInfo.h" |
36 | #include "llvm/CodeGen/MachineFunction.h" |
37 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
38 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
39 | #include "llvm/CodeGen/MachineLoopInfo.h" |
40 | #include "llvm/CodeGen/MachineModuleInfo.h" |
41 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
42 | #include "llvm/CodeGen/TargetLowering.h" |
43 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
44 | #include "llvm/IR/CallingConv.h" |
45 | #include "llvm/IR/Constants.h" |
46 | #include "llvm/IR/DerivedTypes.h" |
47 | #include "llvm/IR/DiagnosticInfo.h" |
48 | #include "llvm/IR/Function.h" |
49 | #include "llvm/IR/GlobalAlias.h" |
50 | #include "llvm/IR/GlobalVariable.h" |
51 | #include "llvm/IR/IRBuilder.h" |
52 | #include "llvm/IR/Instructions.h" |
53 | #include "llvm/IR/Intrinsics.h" |
54 | #include "llvm/IR/PatternMatch.h" |
55 | #include "llvm/MC/MCAsmInfo.h" |
56 | #include "llvm/MC/MCContext.h" |
57 | #include "llvm/MC/MCExpr.h" |
58 | #include "llvm/MC/MCSymbol.h" |
59 | #include "llvm/Support/CommandLine.h" |
60 | #include "llvm/Support/Debug.h" |
61 | #include "llvm/Support/ErrorHandling.h" |
62 | #include "llvm/Support/KnownBits.h" |
63 | #include "llvm/Support/MathExtras.h" |
64 | #include "llvm/Target/TargetOptions.h" |
65 | #include <algorithm> |
66 | #include <bitset> |
67 | #include <cctype> |
68 | #include <numeric> |
69 | using namespace llvm; |
70 | |
71 | #define DEBUG_TYPE "x86-isel" |
72 | |
73 | STATISTIC(NumTailCalls, "Number of tail calls"); |
74 | |
75 | static cl::opt<int> ExperimentalPrefInnermostLoopAlignment( |
76 | "x86-experimental-pref-innermost-loop-alignment", cl::init(4), |
77 | cl::desc( |
78 | "Sets the preferable loop alignment for experiments (as log2 bytes) " |
79 | "for innermost loops only. If specified, this option overrides " |
80 | "alignment set by x86-experimental-pref-loop-alignment."), |
81 | cl::Hidden); |
82 | |
83 | static cl::opt<bool> MulConstantOptimization( |
84 | "mul-constant-optimization", cl::init(true), |
85 | cl::desc("Replace 'mul x, Const' with more effective instructions like " |
86 | "SHIFT, LEA, etc."), |
87 | cl::Hidden); |
88 | |
89 | static cl::opt<bool> ExperimentalUnorderedISEL( |
90 | "x86-experimental-unordered-atomic-isel", cl::init(false), |
91 | cl::desc("Use LoadSDNode and StoreSDNode instead of " |
92 | "AtomicSDNode for unordered atomic loads and " |
93 | "stores respectively."), |
94 | cl::Hidden); |
95 | |
96 | |
97 | |
98 | |
99 | |
100 | static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, |
101 | const char *Msg) { |
102 | MachineFunction &MF = DAG.getMachineFunction(); |
103 | DAG.getContext()->diagnose( |
104 | DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); |
105 | } |
106 | |
107 | X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, |
108 | const X86Subtarget &STI) |
109 | : TargetLowering(TM), Subtarget(STI) { |
110 | bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
111 | X86ScalarSSEf64 = Subtarget.hasSSE2(); |
112 | X86ScalarSSEf32 = Subtarget.hasSSE1(); |
113 | X86ScalarSSEf16 = Subtarget.hasFP16(); |
114 | MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); |
115 | |
116 | |
117 | |
118 | |
119 | setBooleanContents(ZeroOrOneBooleanContent); |
120 | |
121 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
122 | |
123 | |
124 | |
125 | |
126 | if (Subtarget.isAtom()) |
127 | setSchedulingPreference(Sched::ILP); |
128 | else if (Subtarget.is64Bit()) |
129 | setSchedulingPreference(Sched::ILP); |
130 | else |
131 | setSchedulingPreference(Sched::RegPressure); |
132 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
133 | setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); |
134 | |
135 | |
136 | if (TM.getOptLevel() >= CodeGenOpt::Default) { |
137 | if (Subtarget.hasSlowDivide32()) |
138 | addBypassSlowDiv(32, 8); |
139 | if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) |
140 | addBypassSlowDiv(64, 32); |
141 | } |
142 | |
143 | |
144 | if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) { |
145 | static const struct { |
146 | const RTLIB::Libcall Op; |
147 | const char * const Name; |
148 | const CallingConv::ID CC; |
149 | } LibraryCalls[] = { |
150 | { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall }, |
151 | { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall }, |
152 | { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall }, |
153 | { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall }, |
154 | { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall }, |
155 | }; |
156 | |
157 | for (const auto &LC : LibraryCalls) { |
158 | setLibcallName(LC.Op, LC.Name); |
159 | setLibcallCallingConv(LC.Op, LC.CC); |
160 | } |
161 | } |
162 | |
163 | if (Subtarget.getTargetTriple().isOSMSVCRT()) { |
164 | |
165 | setLibcallName(RTLIB::POWI_F32, nullptr); |
166 | setLibcallName(RTLIB::POWI_F64, nullptr); |
167 | } |
168 | |
169 | |
170 | |
171 | |
172 | |
173 | if (!Subtarget.hasCmpxchg8b()) |
174 | setMaxAtomicSizeInBitsSupported(32); |
175 | |
176 | |
177 | addRegisterClass(MVT::i8, &X86::GR8RegClass); |
178 | addRegisterClass(MVT::i16, &X86::GR16RegClass); |
179 | addRegisterClass(MVT::i32, &X86::GR32RegClass); |
180 | if (Subtarget.is64Bit()) |
181 | addRegisterClass(MVT::i64, &X86::GR64RegClass); |
182 | |
183 | for (MVT VT : MVT::integer_valuetypes()) |
184 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
185 | |
186 | |
187 | setTruncStoreAction(MVT::i64, MVT::i32, Expand); |
188 | setTruncStoreAction(MVT::i64, MVT::i16, Expand); |
189 | setTruncStoreAction(MVT::i64, MVT::i8 , Expand); |
190 | setTruncStoreAction(MVT::i32, MVT::i16, Expand); |
191 | setTruncStoreAction(MVT::i32, MVT::i8 , Expand); |
192 | setTruncStoreAction(MVT::i16, MVT::i8, Expand); |
193 | |
194 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
195 | |
196 | |
197 | for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) { |
198 | setCondCodeAction(ISD::SETOEQ, VT, Expand); |
199 | setCondCodeAction(ISD::SETUNE, VT, Expand); |
200 | } |
201 | |
202 | |
203 | if (Subtarget.hasCMov()) { |
204 | setOperationAction(ISD::ABS , MVT::i16 , Custom); |
205 | setOperationAction(ISD::ABS , MVT::i32 , Custom); |
206 | if (Subtarget.is64Bit()) |
207 | setOperationAction(ISD::ABS , MVT::i64 , Custom); |
208 | } |
209 | |
210 | |
211 | setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom); |
212 | setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom); |
213 | setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom); |
214 | if (Subtarget.is64Bit()) |
215 | setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom); |
216 | |
217 | |
218 | for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { |
219 | |
220 | LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal; |
221 | |
222 | setOperationAction(ShiftOp , MVT::i8 , Custom); |
223 | setOperationAction(ShiftOp , MVT::i16 , Custom); |
224 | setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction); |
225 | if (Subtarget.is64Bit()) |
226 | setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction); |
227 | } |
228 | |
229 | if (!Subtarget.useSoftFloat()) { |
230 | |
231 | |
232 | setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); |
233 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote); |
234 | setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); |
235 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote); |
236 | |
237 | |
238 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
239 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); |
240 | |
241 | |
242 | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
243 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); |
244 | |
245 | |
246 | |
247 | setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); |
248 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote); |
249 | |
250 | |
251 | setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); |
252 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom); |
253 | |
254 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
255 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); |
256 | |
257 | |
258 | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
259 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); |
260 | |
261 | |
262 | |
263 | setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); |
264 | |
265 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote); |
266 | setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom); |
267 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom); |
268 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
269 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
270 | |
271 | |
272 | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
273 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); |
274 | |
275 | |
276 | |
277 | setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); |
278 | |
279 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote); |
280 | setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); |
281 | |
282 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote); |
283 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
284 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
285 | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
286 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); |
287 | |
288 | setOperationAction(ISD::LRINT, MVT::f32, Custom); |
289 | setOperationAction(ISD::LRINT, MVT::f64, Custom); |
290 | setOperationAction(ISD::LLRINT, MVT::f32, Custom); |
291 | setOperationAction(ISD::LLRINT, MVT::f64, Custom); |
292 | |
293 | if (!Subtarget.is64Bit()) { |
294 | setOperationAction(ISD::LRINT, MVT::i64, Custom); |
295 | setOperationAction(ISD::LLRINT, MVT::i64, Custom); |
296 | } |
297 | } |
298 | |
299 | if (Subtarget.hasSSE2()) { |
300 | |
301 | |
302 | for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) { |
303 | setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); |
304 | setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); |
305 | } |
306 | if (Subtarget.is64Bit()) { |
307 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); |
308 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); |
309 | } |
310 | } |
311 | |
312 | |
313 | setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); |
314 | setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); |
315 | |
316 | |
317 | if (!X86ScalarSSEf64) { |
318 | setOperationAction(ISD::BITCAST , MVT::f32 , Expand); |
319 | setOperationAction(ISD::BITCAST , MVT::i32 , Expand); |
320 | if (Subtarget.is64Bit()) { |
321 | setOperationAction(ISD::BITCAST , MVT::f64 , Expand); |
322 | |
323 | setOperationAction(ISD::BITCAST , MVT::i64 , Expand); |
324 | } |
325 | } else if (!Subtarget.is64Bit()) |
326 | setOperationAction(ISD::BITCAST , MVT::i64 , Custom); |
327 | |
328 | |
329 | |
330 | |
331 | |
332 | |
333 | |
334 | |
335 | |
336 | |
337 | |
338 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
339 | setOperationAction(ISD::MULHS, VT, Expand); |
340 | setOperationAction(ISD::MULHU, VT, Expand); |
341 | setOperationAction(ISD::SDIV, VT, Expand); |
342 | setOperationAction(ISD::UDIV, VT, Expand); |
343 | setOperationAction(ISD::SREM, VT, Expand); |
344 | setOperationAction(ISD::UREM, VT, Expand); |
345 | } |
346 | |
347 | setOperationAction(ISD::BR_JT , MVT::Other, Expand); |
348 | setOperationAction(ISD::BRCOND , MVT::Other, Custom); |
349 | for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, |
350 | MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
351 | setOperationAction(ISD::BR_CC, VT, Expand); |
352 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
353 | } |
354 | if (Subtarget.is64Bit()) |
355 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); |
356 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); |
357 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); |
358 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); |
359 | |
360 | setOperationAction(ISD::FREM , MVT::f32 , Expand); |
361 | setOperationAction(ISD::FREM , MVT::f64 , Expand); |
362 | setOperationAction(ISD::FREM , MVT::f80 , Expand); |
363 | setOperationAction(ISD::FREM , MVT::f128 , Expand); |
364 | |
365 | if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) { |
366 | setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); |
367 | setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom); |
368 | } |
369 | |
370 | |
371 | |
372 | setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32); |
373 | setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
374 | |
375 | if (Subtarget.hasBMI()) { |
376 | |
377 | |
378 | setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32); |
379 | } else { |
380 | setOperationAction(ISD::CTTZ, MVT::i16, Custom); |
381 | setOperationAction(ISD::CTTZ , MVT::i32 , Custom); |
382 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal); |
383 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); |
384 | if (Subtarget.is64Bit()) { |
385 | setOperationAction(ISD::CTTZ , MVT::i64 , Custom); |
386 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); |
387 | } |
388 | } |
389 | |
390 | if (Subtarget.hasLZCNT()) { |
391 | |
392 | |
393 | setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32); |
394 | setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
395 | } else { |
396 | for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { |
397 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
398 | continue; |
399 | setOperationAction(ISD::CTLZ , VT, Custom); |
400 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); |
401 | } |
402 | } |
403 | |
404 | for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16, |
405 | ISD::STRICT_FP_TO_FP16}) { |
406 | |
407 | |
408 | |
409 | setOperationAction( |
410 | Op, MVT::f32, |
411 | (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand); |
412 | |
413 | setOperationAction(Op, MVT::f64, Expand); |
414 | setOperationAction(Op, MVT::f80, Expand); |
415 | setOperationAction(Op, MVT::f128, Expand); |
416 | } |
417 | |
418 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
419 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
420 | setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); |
421 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand); |
422 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
423 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
424 | setTruncStoreAction(MVT::f80, MVT::f16, Expand); |
425 | setTruncStoreAction(MVT::f128, MVT::f16, Expand); |
426 | |
427 | setOperationAction(ISD::PARITY, MVT::i8, Custom); |
428 | if (Subtarget.hasPOPCNT()) { |
429 | setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); |
430 | } else { |
431 | setOperationAction(ISD::CTPOP , MVT::i8 , Expand); |
432 | setOperationAction(ISD::CTPOP , MVT::i16 , Expand); |
433 | setOperationAction(ISD::CTPOP , MVT::i32 , Expand); |
434 | if (Subtarget.is64Bit()) |
435 | setOperationAction(ISD::CTPOP , MVT::i64 , Expand); |
436 | else |
437 | setOperationAction(ISD::CTPOP , MVT::i64 , Custom); |
438 | |
439 | setOperationAction(ISD::PARITY, MVT::i16, Custom); |
440 | setOperationAction(ISD::PARITY, MVT::i32, Custom); |
441 | if (Subtarget.is64Bit()) |
442 | setOperationAction(ISD::PARITY, MVT::i64, Custom); |
443 | } |
444 | |
445 | setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); |
446 | |
447 | if (!Subtarget.hasMOVBE()) |
448 | setOperationAction(ISD::BSWAP , MVT::i16 , Expand); |
449 | |
450 | |
451 | for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { |
452 | setOperationAction(ISD::SELECT, VT, Custom); |
453 | setOperationAction(ISD::SETCC, VT, Custom); |
454 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
455 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
456 | } |
457 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
458 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
459 | continue; |
460 | setOperationAction(ISD::SELECT, VT, Custom); |
461 | setOperationAction(ISD::SETCC, VT, Custom); |
462 | } |
463 | |
464 | |
465 | setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); |
466 | setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); |
467 | |
468 | setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); |
469 | |
470 | |
471 | setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
472 | setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
473 | setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); |
474 | if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) |
475 | setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); |
476 | |
477 | |
478 | for (auto VT : { MVT::i32, MVT::i64 }) { |
479 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
480 | continue; |
481 | setOperationAction(ISD::ConstantPool , VT, Custom); |
482 | setOperationAction(ISD::JumpTable , VT, Custom); |
483 | setOperationAction(ISD::GlobalAddress , VT, Custom); |
484 | setOperationAction(ISD::GlobalTLSAddress, VT, Custom); |
485 | setOperationAction(ISD::ExternalSymbol , VT, Custom); |
486 | setOperationAction(ISD::BlockAddress , VT, Custom); |
487 | } |
488 | |
489 | |
490 | for (auto VT : { MVT::i32, MVT::i64 }) { |
491 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
492 | continue; |
493 | setOperationAction(ISD::SHL_PARTS, VT, Custom); |
494 | setOperationAction(ISD::SRA_PARTS, VT, Custom); |
495 | setOperationAction(ISD::SRL_PARTS, VT, Custom); |
496 | } |
497 | |
498 | if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) |
499 | setOperationAction(ISD::PREFETCH , MVT::Other, Legal); |
500 | |
501 | setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); |
502 | |
503 | |
504 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
505 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); |
506 | setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); |
507 | setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); |
508 | setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); |
509 | setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); |
510 | setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); |
511 | setOperationAction(ISD::ATOMIC_STORE, VT, Custom); |
512 | } |
513 | |
514 | if (!Subtarget.is64Bit()) |
515 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); |
516 | |
517 | if (Subtarget.hasCmpxchg16b()) { |
518 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); |
519 | } |
520 | |
521 | |
522 | if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && |
523 | !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() && |
524 | TM.Options.ExceptionModel != ExceptionHandling::SjLj) { |
525 | setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); |
526 | } |
527 | |
528 | setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); |
529 | setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); |
530 | |
531 | setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); |
532 | setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); |
533 | |
534 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
535 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
536 | setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal); |
537 | |
538 | |
539 | setOperationAction(ISD::VASTART , MVT::Other, Custom); |
540 | setOperationAction(ISD::VAEND , MVT::Other, Expand); |
541 | bool Is64Bit = Subtarget.is64Bit(); |
542 | setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand); |
543 | setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand); |
544 | |
545 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
546 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
547 | |
548 | setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); |
549 | |
550 | |
551 | setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); |
552 | setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); |
553 | |
554 | if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { |
555 | |
556 | |
557 | addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass |
558 | : &X86::FR32RegClass); |
559 | addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass |
560 | : &X86::FR64RegClass); |
561 | |
562 | |
563 | |
564 | |
565 | |
566 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
567 | |
568 | for (auto VT : { MVT::f32, MVT::f64 }) { |
569 | |
570 | setOperationAction(ISD::FABS, VT, Custom); |
571 | |
572 | |
573 | setOperationAction(ISD::FNEG, VT, Custom); |
574 | |
575 | |
576 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
577 | |
578 | |
579 | setOperationAction(ISD::FADD, VT, Custom); |
580 | setOperationAction(ISD::FSUB, VT, Custom); |
581 | |
582 | |
583 | setOperationAction(ISD::FSIN , VT, Expand); |
584 | setOperationAction(ISD::FCOS , VT, Expand); |
585 | setOperationAction(ISD::FSINCOS, VT, Expand); |
586 | } |
587 | |
588 | |
589 | setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); |
590 | setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); |
591 | |
592 | } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 && |
593 | (UseX87 || Is64Bit)) { |
594 | |
595 | |
596 | addRegisterClass(MVT::f32, &X86::FR32RegClass); |
597 | if (UseX87) |
598 | addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
599 | |
600 | |
601 | setOperationAction(ISD::FABS , MVT::f32, Custom); |
602 | |
603 | |
604 | setOperationAction(ISD::FNEG , MVT::f32, Custom); |
605 | |
606 | if (UseX87) |
607 | setOperationAction(ISD::UNDEF, MVT::f64, Expand); |
608 | |
609 | |
610 | if (UseX87) |
611 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
612 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
613 | |
614 | |
615 | setOperationAction(ISD::FSIN , MVT::f32, Expand); |
616 | setOperationAction(ISD::FCOS , MVT::f32, Expand); |
617 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
618 | |
619 | if (UseX87) { |
620 | |
621 | setOperationAction(ISD::FSIN, MVT::f64, Expand); |
622 | setOperationAction(ISD::FCOS, MVT::f64, Expand); |
623 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
624 | } |
625 | } else if (UseX87) { |
626 | |
627 | |
628 | addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
629 | addRegisterClass(MVT::f32, &X86::RFP32RegClass); |
630 | |
631 | for (auto VT : { MVT::f32, MVT::f64 }) { |
632 | setOperationAction(ISD::UNDEF, VT, Expand); |
633 | setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
634 | |
635 | |
636 | setOperationAction(ISD::FSIN , VT, Expand); |
637 | setOperationAction(ISD::FCOS , VT, Expand); |
638 | setOperationAction(ISD::FSINCOS, VT, Expand); |
639 | } |
640 | } |
641 | |
642 | |
643 | if (isTypeLegal(MVT::f32)) { |
644 | if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) { |
645 | addLegalFPImmediate(APFloat(+0.0f)); |
646 | addLegalFPImmediate(APFloat(+1.0f)); |
647 | addLegalFPImmediate(APFloat(-0.0f)); |
648 | addLegalFPImmediate(APFloat(-1.0f)); |
649 | } else |
650 | addLegalFPImmediate(APFloat(+0.0f)); |
651 | } |
652 | |
653 | if (isTypeLegal(MVT::f64)) { |
654 | if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) { |
655 | addLegalFPImmediate(APFloat(+0.0)); |
656 | addLegalFPImmediate(APFloat(+1.0)); |
657 | addLegalFPImmediate(APFloat(-0.0)); |
658 | addLegalFPImmediate(APFloat(-1.0)); |
659 | } else |
660 | addLegalFPImmediate(APFloat(+0.0)); |
661 | } |
662 | |
663 | setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); |
664 | setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); |
665 | setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); |
666 | setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); |
667 | setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); |
668 | setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); |
669 | setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); |
670 | setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); |
671 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); |
672 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); |
673 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); |
674 | setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); |
675 | setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); |
676 | |
677 | |
678 | setOperationAction(ISD::FMA, MVT::f64, Expand); |
679 | setOperationAction(ISD::FMA, MVT::f32, Expand); |
680 | |
681 | |
682 | if (UseX87) { |
683 | addRegisterClass(MVT::f80, &X86::RFP80RegClass); |
684 | setOperationAction(ISD::UNDEF, MVT::f80, Expand); |
685 | setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); |
686 | { |
687 | APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended()); |
688 | addLegalFPImmediate(TmpFlt); |
689 | TmpFlt.changeSign(); |
690 | addLegalFPImmediate(TmpFlt); |
691 | |
692 | bool ignored; |
693 | APFloat TmpFlt2(+1.0); |
694 | TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, |
695 | &ignored); |
696 | addLegalFPImmediate(TmpFlt2); |
697 | TmpFlt2.changeSign(); |
698 | addLegalFPImmediate(TmpFlt2); |
699 | } |
700 | |
701 | |
702 | setOperationAction(ISD::FSIN , MVT::f80, Expand); |
703 | setOperationAction(ISD::FCOS , MVT::f80, Expand); |
704 | setOperationAction(ISD::FSINCOS, MVT::f80, Expand); |
705 | |
706 | setOperationAction(ISD::FFLOOR, MVT::f80, Expand); |
707 | setOperationAction(ISD::FCEIL, MVT::f80, Expand); |
708 | setOperationAction(ISD::FTRUNC, MVT::f80, Expand); |
709 | setOperationAction(ISD::FRINT, MVT::f80, Expand); |
710 | setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); |
711 | setOperationAction(ISD::FMA, MVT::f80, Expand); |
712 | setOperationAction(ISD::LROUND, MVT::f80, Expand); |
713 | setOperationAction(ISD::LLROUND, MVT::f80, Expand); |
714 | setOperationAction(ISD::LRINT, MVT::f80, Custom); |
715 | setOperationAction(ISD::LLRINT, MVT::f80, Custom); |
716 | |
717 | |
718 | setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal); |
719 | setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal); |
720 | setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal); |
721 | setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal); |
722 | setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal); |
723 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal); |
724 | |
725 | |
726 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal); |
727 | } |
728 | |
729 | |
730 | if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) { |
731 | addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass |
732 | : &X86::VR128RegClass); |
733 | |
734 | addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); |
735 | |
736 | setOperationAction(ISD::FADD, MVT::f128, LibCall); |
737 | setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall); |
738 | setOperationAction(ISD::FSUB, MVT::f128, LibCall); |
739 | setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall); |
740 | setOperationAction(ISD::FDIV, MVT::f128, LibCall); |
741 | setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall); |
742 | setOperationAction(ISD::FMUL, MVT::f128, LibCall); |
743 | setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall); |
744 | setOperationAction(ISD::FMA, MVT::f128, LibCall); |
745 | setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall); |
746 | |
747 | setOperationAction(ISD::FABS, MVT::f128, Custom); |
748 | setOperationAction(ISD::FNEG, MVT::f128, Custom); |
749 | setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); |
750 | |
751 | setOperationAction(ISD::FSIN, MVT::f128, LibCall); |
752 | setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall); |
753 | setOperationAction(ISD::FCOS, MVT::f128, LibCall); |
754 | setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall); |
755 | setOperationAction(ISD::FSINCOS, MVT::f128, LibCall); |
756 | |
757 | setOperationAction(ISD::FSQRT, MVT::f128, LibCall); |
758 | setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall); |
759 | |
760 | setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); |
761 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom); |
762 | |
763 | |
764 | |
765 | if (isTypeLegal(MVT::f32)) { |
766 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
767 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
768 | } |
769 | if (isTypeLegal(MVT::f64)) { |
770 | setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); |
771 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); |
772 | } |
773 | if (isTypeLegal(MVT::f80)) { |
774 | setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); |
775 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); |
776 | } |
777 | |
778 | setOperationAction(ISD::SETCC, MVT::f128, Custom); |
779 | |
780 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); |
781 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); |
782 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand); |
783 | setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
784 | setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
785 | setTruncStoreAction(MVT::f128, MVT::f80, Expand); |
786 | } |
787 | |
788 | |
789 | setOperationAction(ISD::FPOW , MVT::f32 , Expand); |
790 | setOperationAction(ISD::FPOW , MVT::f64 , Expand); |
791 | setOperationAction(ISD::FPOW , MVT::f80 , Expand); |
792 | setOperationAction(ISD::FPOW , MVT::f128 , Expand); |
793 | |
794 | setOperationAction(ISD::FLOG, MVT::f80, Expand); |
795 | setOperationAction(ISD::FLOG2, MVT::f80, Expand); |
796 | setOperationAction(ISD::FLOG10, MVT::f80, Expand); |
797 | setOperationAction(ISD::FEXP, MVT::f80, Expand); |
798 | setOperationAction(ISD::FEXP2, MVT::f80, Expand); |
799 | setOperationAction(ISD::FMINNUM, MVT::f80, Expand); |
800 | setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); |
801 | |
802 | |
803 | for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, |
804 | MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { |
805 | setOperationAction(ISD::FSIN, VT, Expand); |
806 | setOperationAction(ISD::FSINCOS, VT, Expand); |
807 | setOperationAction(ISD::FCOS, VT, Expand); |
808 | setOperationAction(ISD::FREM, VT, Expand); |
809 | setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
810 | setOperationAction(ISD::FPOW, VT, Expand); |
811 | setOperationAction(ISD::FLOG, VT, Expand); |
812 | setOperationAction(ISD::FLOG2, VT, Expand); |
813 | setOperationAction(ISD::FLOG10, VT, Expand); |
814 | setOperationAction(ISD::FEXP, VT, Expand); |
815 | setOperationAction(ISD::FEXP2, VT, Expand); |
816 | } |
817 | |
818 | |
819 | |
820 | |
821 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
822 | setOperationAction(ISD::SDIV, VT, Expand); |
823 | setOperationAction(ISD::UDIV, VT, Expand); |
824 | setOperationAction(ISD::SREM, VT, Expand); |
825 | setOperationAction(ISD::UREM, VT, Expand); |
826 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); |
827 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); |
828 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); |
829 | setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); |
830 | setOperationAction(ISD::FMA, VT, Expand); |
831 | setOperationAction(ISD::FFLOOR, VT, Expand); |
832 | setOperationAction(ISD::FCEIL, VT, Expand); |
833 | setOperationAction(ISD::FTRUNC, VT, Expand); |
834 | setOperationAction(ISD::FRINT, VT, Expand); |
835 | setOperationAction(ISD::FNEARBYINT, VT, Expand); |
836 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
837 | setOperationAction(ISD::MULHS, VT, Expand); |
838 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
839 | setOperationAction(ISD::MULHU, VT, Expand); |
840 | setOperationAction(ISD::SDIVREM, VT, Expand); |
841 | setOperationAction(ISD::UDIVREM, VT, Expand); |
842 | setOperationAction(ISD::CTPOP, VT, Expand); |
843 | setOperationAction(ISD::CTTZ, VT, Expand); |
844 | setOperationAction(ISD::CTLZ, VT, Expand); |
845 | setOperationAction(ISD::ROTL, VT, Expand); |
846 | setOperationAction(ISD::ROTR, VT, Expand); |
847 | setOperationAction(ISD::BSWAP, VT, Expand); |
848 | setOperationAction(ISD::SETCC, VT, Expand); |
849 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
850 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
851 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
852 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
853 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); |
854 | setOperationAction(ISD::TRUNCATE, VT, Expand); |
855 | setOperationAction(ISD::SIGN_EXTEND, VT, Expand); |
856 | setOperationAction(ISD::ZERO_EXTEND, VT, Expand); |
857 | setOperationAction(ISD::ANY_EXTEND, VT, Expand); |
858 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
859 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
860 | setTruncStoreAction(InnerVT, VT, Expand); |
861 | |
862 | setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); |
863 | setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand); |
864 | |
865 | |
866 | |
867 | |
868 | |
869 | if (VT.getVectorElementType() == MVT::i1) |
870 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
871 | |
872 | |
873 | |
874 | if (VT.getVectorElementType() == MVT::f16) |
875 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
876 | } |
877 | } |
878 | |
879 | |
880 | |
881 | if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { |
882 | addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); |
883 | |
884 | } |
885 | |
886 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { |
887 | addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
888 | : &X86::VR128RegClass); |
889 | |
890 | setOperationAction(ISD::FNEG, MVT::v4f32, Custom); |
891 | setOperationAction(ISD::FABS, MVT::v4f32, Custom); |
892 | setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom); |
893 | setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
894 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); |
895 | setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); |
896 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); |
897 | setOperationAction(ISD::SELECT, MVT::v4f32, Custom); |
898 | |
899 | setOperationAction(ISD::LOAD, MVT::v2f32, Custom); |
900 | setOperationAction(ISD::STORE, MVT::v2f32, Custom); |
901 | |
902 | setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); |
903 | setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); |
904 | setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); |
905 | setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); |
906 | setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); |
907 | } |
908 | |
909 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { |
910 | addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
911 | : &X86::VR128RegClass); |
912 | |
913 | |
914 | |
915 | addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass |
916 | : &X86::VR128RegClass); |
917 | addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass |
918 | : &X86::VR128RegClass); |
919 | addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
920 | : &X86::VR128RegClass); |
921 | addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
922 | : &X86::VR128RegClass); |
923 | |
924 | for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8, |
925 | MVT::v2i16, MVT::v4i16, MVT::v2i32 }) { |
926 | setOperationAction(ISD::SDIV, VT, Custom); |
927 | setOperationAction(ISD::SREM, VT, Custom); |
928 | setOperationAction(ISD::UDIV, VT, Custom); |
929 | setOperationAction(ISD::UREM, VT, Custom); |
930 | } |
931 | |
932 | setOperationAction(ISD::MUL, MVT::v2i8, Custom); |
933 | setOperationAction(ISD::MUL, MVT::v4i8, Custom); |
934 | setOperationAction(ISD::MUL, MVT::v8i8, Custom); |
935 | |
936 | setOperationAction(ISD::MUL, MVT::v16i8, Custom); |
937 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
938 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
939 | setOperationAction(ISD::MULHU, MVT::v4i32, Custom); |
940 | setOperationAction(ISD::MULHS, MVT::v4i32, Custom); |
941 | setOperationAction(ISD::MULHU, MVT::v16i8, Custom); |
942 | setOperationAction(ISD::MULHS, MVT::v16i8, Custom); |
943 | setOperationAction(ISD::MULHU, MVT::v8i16, Legal); |
944 | setOperationAction(ISD::MULHS, MVT::v8i16, Legal); |
945 | setOperationAction(ISD::MUL, MVT::v8i16, Legal); |
946 | |
947 | setOperationAction(ISD::SMULO, MVT::v16i8, Custom); |
948 | setOperationAction(ISD::UMULO, MVT::v16i8, Custom); |
949 | |
950 | setOperationAction(ISD::FNEG, MVT::v2f64, Custom); |
951 | setOperationAction(ISD::FABS, MVT::v2f64, Custom); |
952 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); |
953 | |
954 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
955 | setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom); |
956 | setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom); |
957 | setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom); |
958 | setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); |
959 | } |
960 | |
961 | setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); |
962 | setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal); |
963 | setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); |
964 | setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal); |
965 | setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); |
966 | setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal); |
967 | setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); |
968 | setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal); |
969 | setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom); |
970 | setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); |
971 | |
972 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); |
973 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); |
974 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
975 | |
976 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
977 | setOperationAction(ISD::SETCC, VT, Custom); |
978 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
979 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
980 | setOperationAction(ISD::CTPOP, VT, Custom); |
981 | setOperationAction(ISD::ABS, VT, Custom); |
982 | |
983 | |
984 | |
985 | setCondCodeAction(ISD::SETLT, VT, Custom); |
986 | setCondCodeAction(ISD::SETLE, VT, Custom); |
987 | } |
988 | |
989 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { |
990 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
991 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
992 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
993 | setOperationAction(ISD::VSELECT, VT, Custom); |
994 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
995 | } |
996 | |
997 | for (auto VT : { MVT::v2f64, MVT::v2i64 }) { |
998 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
999 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1000 | setOperationAction(ISD::VSELECT, VT, Custom); |
1001 | |
1002 | if (VT == MVT::v2i64 && !Subtarget.is64Bit()) |
1003 | continue; |
1004 | |
1005 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1006 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1007 | } |
1008 | |
1009 | |
1010 | setOperationAction(ISD::SELECT, MVT::v2f64, Custom); |
1011 | setOperationAction(ISD::SELECT, MVT::v2i64, Custom); |
1012 | setOperationAction(ISD::SELECT, MVT::v4i32, Custom); |
1013 | setOperationAction(ISD::SELECT, MVT::v8i16, Custom); |
1014 | setOperationAction(ISD::SELECT, MVT::v16i8, Custom); |
1015 | |
1016 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); |
1017 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom); |
1018 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); |
1019 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); |
1020 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); |
1021 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom); |
1022 | |
1023 | |
1024 | for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) { |
1025 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
1026 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
1027 | setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom); |
1028 | setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom); |
1029 | } |
1030 | |
1031 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); |
1032 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); |
1033 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); |
1034 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom); |
1035 | |
1036 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); |
1037 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom); |
1038 | |
1039 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); |
1040 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom); |
1041 | |
1042 | |
1043 | setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); |
1044 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom); |
1045 | setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); |
1046 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom); |
1047 | |
1048 | setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); |
1049 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom); |
1050 | setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); |
1051 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom); |
1052 | |
1053 | |
1054 | |
1055 | |
1056 | setOperationAction(ISD::LOAD, MVT::v2i32, Custom); |
1057 | setOperationAction(ISD::LOAD, MVT::v4i16, Custom); |
1058 | setOperationAction(ISD::LOAD, MVT::v8i8, Custom); |
1059 | setOperationAction(ISD::STORE, MVT::v2i32, Custom); |
1060 | setOperationAction(ISD::STORE, MVT::v4i16, Custom); |
1061 | setOperationAction(ISD::STORE, MVT::v8i8, Custom); |
1062 | |
1063 | setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); |
1064 | setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); |
1065 | setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); |
1066 | if (!Subtarget.hasAVX512()) |
1067 | setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); |
1068 | |
1069 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom); |
1070 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); |
1071 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); |
1072 | |
1073 | setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); |
1074 | |
1075 | setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); |
1076 | setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); |
1077 | setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); |
1078 | setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); |
1079 | setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); |
1080 | setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); |
1081 | |
1082 | |
1083 | |
1084 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1085 | setOperationAction(ISD::SRL, VT, Custom); |
1086 | setOperationAction(ISD::SHL, VT, Custom); |
1087 | setOperationAction(ISD::SRA, VT, Custom); |
1088 | } |
1089 | |
1090 | setOperationAction(ISD::ROTL, MVT::v4i32, Custom); |
1091 | setOperationAction(ISD::ROTL, MVT::v8i16, Custom); |
1092 | |
1093 | |
1094 | |
1095 | if (!Subtarget.useAVX512Regs() && |
1096 | !(Subtarget.hasBWI() && Subtarget.hasVLX())) |
1097 | setOperationAction(ISD::ROTL, MVT::v16i8, Custom); |
1098 | |
1099 | setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); |
1100 | setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); |
1101 | setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); |
1102 | setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); |
1103 | setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); |
1104 | } |
1105 | |
1106 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { |
1107 | setOperationAction(ISD::ABS, MVT::v16i8, Legal); |
1108 | setOperationAction(ISD::ABS, MVT::v8i16, Legal); |
1109 | setOperationAction(ISD::ABS, MVT::v4i32, Legal); |
1110 | setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); |
1111 | setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); |
1112 | setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); |
1113 | setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); |
1114 | setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); |
1115 | |
1116 | |
1117 | setOperationAction(ISD::ADD, MVT::i16, Custom); |
1118 | setOperationAction(ISD::ADD, MVT::i32, Custom); |
1119 | setOperationAction(ISD::SUB, MVT::i16, Custom); |
1120 | setOperationAction(ISD::SUB, MVT::i32, Custom); |
1121 | } |
1122 | |
1123 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { |
1124 | for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { |
1125 | setOperationAction(ISD::FFLOOR, RoundedTy, Legal); |
1126 | setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal); |
1127 | setOperationAction(ISD::FCEIL, RoundedTy, Legal); |
1128 | setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal); |
1129 | setOperationAction(ISD::FTRUNC, RoundedTy, Legal); |
1130 | setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal); |
1131 | setOperationAction(ISD::FRINT, RoundedTy, Legal); |
1132 | setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal); |
1133 | setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); |
1134 | setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal); |
1135 | setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal); |
1136 | setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal); |
1137 | |
1138 | setOperationAction(ISD::FROUND, RoundedTy, Custom); |
1139 | } |
1140 | |
1141 | setOperationAction(ISD::SMAX, MVT::v16i8, Legal); |
1142 | setOperationAction(ISD::SMAX, MVT::v4i32, Legal); |
1143 | setOperationAction(ISD::UMAX, MVT::v8i16, Legal); |
1144 | setOperationAction(ISD::UMAX, MVT::v4i32, Legal); |
1145 | setOperationAction(ISD::SMIN, MVT::v16i8, Legal); |
1146 | setOperationAction(ISD::SMIN, MVT::v4i32, Legal); |
1147 | setOperationAction(ISD::UMIN, MVT::v8i16, Legal); |
1148 | setOperationAction(ISD::UMIN, MVT::v4i32, Legal); |
1149 | |
1150 | setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom); |
1151 | setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom); |
1152 | setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom); |
1153 | |
1154 | |
1155 | setOperationAction(ISD::MUL, MVT::v4i32, Legal); |
1156 | |
1157 | |
1158 | |
1159 | setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); |
1160 | |
1161 | |
1162 | |
1163 | for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1164 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); |
1165 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); |
1166 | } |
1167 | |
1168 | |
1169 | for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
1170 | setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); |
1171 | setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); |
1172 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); |
1173 | setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); |
1174 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); |
1175 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal); |
1176 | } |
1177 | |
1178 | |
1179 | |
1180 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); |
1181 | |
1182 | if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) { |
1183 | |
1184 | |
1185 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom); |
1186 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom); |
1187 | |
1188 | |
1189 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom); |
1190 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom); |
1191 | } |
1192 | } |
1193 | |
1194 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) { |
1195 | setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); |
1196 | } |
1197 | |
1198 | if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { |
1199 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1200 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
1201 | setOperationAction(ISD::ROTL, VT, Custom); |
1202 | |
1203 | |
1204 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) |
1205 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1206 | |
1207 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1208 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
1209 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1210 | } |
1211 | |
1212 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) { |
1213 | bool HasInt256 = Subtarget.hasInt256(); |
1214 | |
1215 | addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1216 | : &X86::VR256RegClass); |
1217 | addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1218 | : &X86::VR256RegClass); |
1219 | addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1220 | : &X86::VR256RegClass); |
1221 | addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1222 | : &X86::VR256RegClass); |
1223 | addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1224 | : &X86::VR256RegClass); |
1225 | addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1226 | : &X86::VR256RegClass); |
1227 | |
1228 | for (auto VT : { MVT::v8f32, MVT::v4f64 }) { |
1229 | setOperationAction(ISD::FFLOOR, VT, Legal); |
1230 | setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); |
1231 | setOperationAction(ISD::FCEIL, VT, Legal); |
1232 | setOperationAction(ISD::STRICT_FCEIL, VT, Legal); |
1233 | setOperationAction(ISD::FTRUNC, VT, Legal); |
1234 | setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); |
1235 | setOperationAction(ISD::FRINT, VT, Legal); |
1236 | setOperationAction(ISD::STRICT_FRINT, VT, Legal); |
1237 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
1238 | setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); |
1239 | setOperationAction(ISD::FROUNDEVEN, VT, Legal); |
1240 | setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); |
1241 | |
1242 | setOperationAction(ISD::FROUND, VT, Custom); |
1243 | |
1244 | setOperationAction(ISD::FNEG, VT, Custom); |
1245 | setOperationAction(ISD::FABS, VT, Custom); |
1246 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1247 | } |
1248 | |
1249 | |
1250 | |
1251 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); |
1252 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); |
1253 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32); |
1254 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32); |
1255 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); |
1256 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom); |
1257 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal); |
1258 | |
1259 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); |
1260 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal); |
1261 | |
1262 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); |
1263 | setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal); |
1264 | setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal); |
1265 | setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal); |
1266 | setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal); |
1267 | setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal); |
1268 | setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal); |
1269 | setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal); |
1270 | setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal); |
1271 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); |
1272 | setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal); |
1273 | setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal); |
1274 | |
1275 | if (!Subtarget.hasAVX512()) |
1276 | setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); |
1277 | |
1278 | |
1279 | |
1280 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1281 | setOperationAction(ISD::SRL, VT, Custom); |
1282 | setOperationAction(ISD::SHL, VT, Custom); |
1283 | setOperationAction(ISD::SRA, VT, Custom); |
1284 | } |
1285 | |
1286 | |
1287 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
1288 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
1289 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
1290 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
1291 | |
1292 | setOperationAction(ISD::ROTL, MVT::v8i32, Custom); |
1293 | setOperationAction(ISD::ROTL, MVT::v16i16, Custom); |
1294 | |
1295 | |
1296 | if (!Subtarget.useBWIRegs()) |
1297 | setOperationAction(ISD::ROTL, MVT::v32i8, Custom); |
1298 | |
1299 | setOperationAction(ISD::SELECT, MVT::v4f64, Custom); |
1300 | setOperationAction(ISD::SELECT, MVT::v4i64, Custom); |
1301 | setOperationAction(ISD::SELECT, MVT::v8i32, Custom); |
1302 | setOperationAction(ISD::SELECT, MVT::v16i16, Custom); |
1303 | setOperationAction(ISD::SELECT, MVT::v32i8, Custom); |
1304 | setOperationAction(ISD::SELECT, MVT::v8f32, Custom); |
1305 | |
1306 | for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1307 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
1308 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
1309 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
1310 | } |
1311 | |
1312 | setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); |
1313 | setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); |
1314 | setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); |
1315 | setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); |
1316 | |
1317 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1318 | setOperationAction(ISD::SETCC, VT, Custom); |
1319 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
1320 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
1321 | setOperationAction(ISD::CTPOP, VT, Custom); |
1322 | setOperationAction(ISD::CTLZ, VT, Custom); |
1323 | |
1324 | |
1325 | |
1326 | setCondCodeAction(ISD::SETLT, VT, Custom); |
1327 | setCondCodeAction(ISD::SETLE, VT, Custom); |
1328 | } |
1329 | |
1330 | if (Subtarget.hasAnyFMA()) { |
1331 | for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, |
1332 | MVT::v2f64, MVT::v4f64 }) { |
1333 | setOperationAction(ISD::FMA, VT, Legal); |
1334 | setOperationAction(ISD::STRICT_FMA, VT, Legal); |
1335 | } |
1336 | } |
1337 | |
1338 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1339 | setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom); |
1340 | setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom); |
1341 | } |
1342 | |
1343 | setOperationAction(ISD::MUL, MVT::v4i64, Custom); |
1344 | setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom); |
1345 | setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom); |
1346 | setOperationAction(ISD::MUL, MVT::v32i8, Custom); |
1347 | |
1348 | setOperationAction(ISD::MULHU, MVT::v8i32, Custom); |
1349 | setOperationAction(ISD::MULHS, MVT::v8i32, Custom); |
1350 | setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom); |
1351 | setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); |
1352 | setOperationAction(ISD::MULHU, MVT::v32i8, Custom); |
1353 | setOperationAction(ISD::MULHS, MVT::v32i8, Custom); |
1354 | |
1355 | setOperationAction(ISD::SMULO, MVT::v32i8, Custom); |
1356 | setOperationAction(ISD::UMULO, MVT::v32i8, Custom); |
1357 | |
1358 | setOperationAction(ISD::ABS, MVT::v4i64, Custom); |
1359 | setOperationAction(ISD::SMAX, MVT::v4i64, Custom); |
1360 | setOperationAction(ISD::UMAX, MVT::v4i64, Custom); |
1361 | setOperationAction(ISD::SMIN, MVT::v4i64, Custom); |
1362 | setOperationAction(ISD::UMIN, MVT::v4i64, Custom); |
1363 | |
1364 | setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1365 | setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1366 | setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1367 | setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1368 | setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1369 | setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1370 | setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1371 | setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1372 | setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom); |
1373 | setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom); |
1374 | setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom); |
1375 | setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom); |
1376 | |
1377 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { |
1378 | setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); |
1379 | setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); |
1380 | setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); |
1381 | setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); |
1382 | setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom); |
1383 | } |
1384 | |
1385 | for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) { |
1386 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
1387 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
1388 | } |
1389 | |
1390 | if (HasInt256) { |
1391 | |
1392 | |
1393 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); |
1394 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom); |
1395 | |
1396 | |
1397 | for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
1398 | setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal); |
1399 | setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); |
1400 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); |
1401 | setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); |
1402 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); |
1403 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); |
1404 | } |
1405 | } |
1406 | |
1407 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1408 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { |
1409 | setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
1410 | setOperationAction(ISD::MSTORE, VT, Legal); |
1411 | } |
1412 | |
1413 | |
1414 | |
1415 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1416 | MVT::v4f32, MVT::v2f64 }) { |
1417 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
1418 | } |
1419 | |
1420 | |
1421 | for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
1422 | MVT::v8f32, MVT::v4f64 }) { |
1423 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1424 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1425 | setOperationAction(ISD::VSELECT, VT, Custom); |
1426 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1427 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1428 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
1429 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
1430 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1431 | setOperationAction(ISD::STORE, VT, Custom); |
1432 | } |
1433 | |
1434 | if (HasInt256) { |
1435 | setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); |
1436 | |
1437 | |
1438 | setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); |
1439 | setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); |
1440 | |
1441 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1442 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
1443 | setOperationAction(ISD::MGATHER, VT, Custom); |
1444 | } |
1445 | } |
1446 | |
1447 | |
1448 | |
1449 | |
1450 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
1451 | addRegisterClass(MVT::v1i1, &X86::VK1RegClass); |
1452 | addRegisterClass(MVT::v2i1, &X86::VK2RegClass); |
1453 | addRegisterClass(MVT::v4i1, &X86::VK4RegClass); |
1454 | addRegisterClass(MVT::v8i1, &X86::VK8RegClass); |
1455 | addRegisterClass(MVT::v16i1, &X86::VK16RegClass); |
1456 | |
1457 | setOperationAction(ISD::SELECT, MVT::v1i1, Custom); |
1458 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); |
1459 | setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); |
1460 | |
1461 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); |
1462 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); |
1463 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); |
1464 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); |
1465 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32); |
1466 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32); |
1467 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32); |
1468 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32); |
1469 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); |
1470 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); |
1471 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom); |
1472 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom); |
1473 | |
1474 | |
1475 | if (!Subtarget.hasDQI()) { |
1476 | setOperationAction(ISD::LOAD, MVT::v1i1, Custom); |
1477 | setOperationAction(ISD::LOAD, MVT::v2i1, Custom); |
1478 | setOperationAction(ISD::LOAD, MVT::v4i1, Custom); |
1479 | setOperationAction(ISD::LOAD, MVT::v8i1, Custom); |
1480 | |
1481 | setOperationAction(ISD::STORE, MVT::v1i1, Custom); |
1482 | setOperationAction(ISD::STORE, MVT::v2i1, Custom); |
1483 | setOperationAction(ISD::STORE, MVT::v4i1, Custom); |
1484 | setOperationAction(ISD::STORE, MVT::v8i1, Custom); |
1485 | } |
1486 | |
1487 | |
1488 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1489 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
1490 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
1491 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
1492 | } |
1493 | |
1494 | for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) |
1495 | setOperationAction(ISD::VSELECT, VT, Expand); |
1496 | |
1497 | for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { |
1498 | setOperationAction(ISD::SETCC, VT, Custom); |
1499 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
1500 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
1501 | setOperationAction(ISD::SELECT, VT, Custom); |
1502 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1503 | |
1504 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1505 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1506 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1507 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1508 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1509 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1510 | } |
1511 | |
1512 | for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) |
1513 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1514 | } |
1515 | |
1516 | |
1517 | |
1518 | |
1519 | if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) { |
1520 | bool HasBWI = Subtarget.hasBWI(); |
1521 | |
1522 | addRegisterClass(MVT::v16i32, &X86::VR512RegClass); |
1523 | addRegisterClass(MVT::v16f32, &X86::VR512RegClass); |
1524 | addRegisterClass(MVT::v8i64, &X86::VR512RegClass); |
1525 | addRegisterClass(MVT::v8f64, &X86::VR512RegClass); |
1526 | addRegisterClass(MVT::v32i16, &X86::VR512RegClass); |
1527 | addRegisterClass(MVT::v64i8, &X86::VR512RegClass); |
1528 | |
1529 | for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { |
1530 | setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); |
1531 | setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); |
1532 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); |
1533 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); |
1534 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); |
1535 | if (HasBWI) |
1536 | setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); |
1537 | } |
1538 | |
1539 | for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { |
1540 | setOperationAction(ISD::FNEG, VT, Custom); |
1541 | setOperationAction(ISD::FABS, VT, Custom); |
1542 | setOperationAction(ISD::FMA, VT, Legal); |
1543 | setOperationAction(ISD::STRICT_FMA, VT, Legal); |
1544 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1545 | } |
1546 | |
1547 | for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) { |
1548 | setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32); |
1549 | setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32); |
1550 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32); |
1551 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32); |
1552 | } |
1553 | setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); |
1554 | setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); |
1555 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal); |
1556 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal); |
1557 | setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); |
1558 | setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); |
1559 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal); |
1560 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal); |
1561 | |
1562 | setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); |
1563 | setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); |
1564 | setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal); |
1565 | setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal); |
1566 | setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal); |
1567 | setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal); |
1568 | setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal); |
1569 | setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); |
1570 | setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal); |
1571 | setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal); |
1572 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); |
1573 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); |
1574 | |
1575 | setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); |
1576 | setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); |
1577 | setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); |
1578 | setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); |
1579 | setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); |
1580 | if (HasBWI) |
1581 | setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); |
1582 | |
1583 | |
1584 | |
1585 | |
1586 | if (!Subtarget.hasVLX()) { |
1587 | for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1588 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { |
1589 | setOperationAction(ISD::MLOAD, VT, Custom); |
1590 | setOperationAction(ISD::MSTORE, VT, Custom); |
1591 | } |
1592 | } |
1593 | |
1594 | setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal); |
1595 | setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal); |
1596 | setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom); |
1597 | setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); |
1598 | setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); |
1599 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
1600 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
1601 | setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); |
1602 | setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); |
1603 | setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); |
1604 | setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); |
1605 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
1606 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
1607 | |
1608 | if (HasBWI) { |
1609 | |
1610 | setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); |
1611 | setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); |
1612 | setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); |
1613 | } |
1614 | |
1615 | for (auto VT : { MVT::v16f32, MVT::v8f64 }) { |
1616 | setOperationAction(ISD::FFLOOR, VT, Legal); |
1617 | setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); |
1618 | setOperationAction(ISD::FCEIL, VT, Legal); |
1619 | setOperationAction(ISD::STRICT_FCEIL, VT, Legal); |
1620 | setOperationAction(ISD::FTRUNC, VT, Legal); |
1621 | setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); |
1622 | setOperationAction(ISD::FRINT, VT, Legal); |
1623 | setOperationAction(ISD::STRICT_FRINT, VT, Legal); |
1624 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
1625 | setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); |
1626 | setOperationAction(ISD::FROUNDEVEN, VT, Legal); |
1627 | setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); |
1628 | |
1629 | setOperationAction(ISD::FROUND, VT, Custom); |
1630 | } |
1631 | |
1632 | for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) { |
1633 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
1634 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
1635 | } |
1636 | |
1637 | setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom); |
1638 | setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom); |
1639 | setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom); |
1640 | setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom); |
1641 | |
1642 | setOperationAction(ISD::MUL, MVT::v8i64, Custom); |
1643 | setOperationAction(ISD::MUL, MVT::v16i32, Legal); |
1644 | setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom); |
1645 | setOperationAction(ISD::MUL, MVT::v64i8, Custom); |
1646 | |
1647 | setOperationAction(ISD::MULHU, MVT::v16i32, Custom); |
1648 | setOperationAction(ISD::MULHS, MVT::v16i32, Custom); |
1649 | setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom); |
1650 | setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom); |
1651 | setOperationAction(ISD::MULHS, MVT::v64i8, Custom); |
1652 | setOperationAction(ISD::MULHU, MVT::v64i8, Custom); |
1653 | |
1654 | setOperationAction(ISD::SMULO, MVT::v64i8, Custom); |
1655 | setOperationAction(ISD::UMULO, MVT::v64i8, Custom); |
1656 | |
1657 | setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); |
1658 | |
1659 | for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { |
1660 | setOperationAction(ISD::SRL, VT, Custom); |
1661 | setOperationAction(ISD::SHL, VT, Custom); |
1662 | setOperationAction(ISD::SRA, VT, Custom); |
1663 | setOperationAction(ISD::SETCC, VT, Custom); |
1664 | |
1665 | |
1666 | |
1667 | setCondCodeAction(ISD::SETLT, VT, Custom); |
1668 | setCondCodeAction(ISD::SETLE, VT, Custom); |
1669 | } |
1670 | for (auto VT : { MVT::v16i32, MVT::v8i64 }) { |
1671 | setOperationAction(ISD::SMAX, VT, Legal); |
1672 | setOperationAction(ISD::UMAX, VT, Legal); |
1673 | setOperationAction(ISD::SMIN, VT, Legal); |
1674 | setOperationAction(ISD::UMIN, VT, Legal); |
1675 | setOperationAction(ISD::ABS, VT, Legal); |
1676 | setOperationAction(ISD::CTPOP, VT, Custom); |
1677 | setOperationAction(ISD::ROTL, VT, Custom); |
1678 | setOperationAction(ISD::ROTR, VT, Custom); |
1679 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
1680 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
1681 | } |
1682 | |
1683 | for (auto VT : { MVT::v64i8, MVT::v32i16 }) { |
1684 | setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom); |
1685 | setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom); |
1686 | setOperationAction(ISD::CTLZ, VT, Custom); |
1687 | setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom); |
1688 | setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom); |
1689 | setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom); |
1690 | setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom); |
1691 | setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom); |
1692 | setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom); |
1693 | setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom); |
1694 | setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom); |
1695 | } |
1696 | |
1697 | if (Subtarget.hasDQI()) { |
1698 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); |
1699 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); |
1700 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal); |
1701 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal); |
1702 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); |
1703 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); |
1704 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal); |
1705 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal); |
1706 | |
1707 | setOperationAction(ISD::MUL, MVT::v8i64, Legal); |
1708 | } |
1709 | |
1710 | if (Subtarget.hasCDI()) { |
1711 | |
1712 | for (auto VT : { MVT::v16i32, MVT::v8i64} ) { |
1713 | setOperationAction(ISD::CTLZ, VT, Legal); |
1714 | } |
1715 | } |
1716 | |
1717 | if (Subtarget.hasVPOPCNTDQ()) { |
1718 | for (auto VT : { MVT::v16i32, MVT::v8i64 }) |
1719 | setOperationAction(ISD::CTPOP, VT, Legal); |
1720 | } |
1721 | |
1722 | |
1723 | |
1724 | |
1725 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
1726 | MVT::v8f32, MVT::v4f64 }) |
1727 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
1728 | |
1729 | for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, |
1730 | MVT::v16f32, MVT::v8f64 }) { |
1731 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1732 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
1733 | setOperationAction(ISD::SELECT, VT, Custom); |
1734 | setOperationAction(ISD::VSELECT, VT, Custom); |
1735 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1736 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1737 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1738 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
1739 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1740 | } |
1741 | |
1742 | for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { |
1743 | setOperationAction(ISD::MLOAD, VT, Legal); |
1744 | setOperationAction(ISD::MSTORE, VT, Legal); |
1745 | setOperationAction(ISD::MGATHER, VT, Custom); |
1746 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1747 | } |
1748 | if (HasBWI) { |
1749 | for (auto VT : { MVT::v64i8, MVT::v32i16 }) { |
1750 | setOperationAction(ISD::MLOAD, VT, Legal); |
1751 | setOperationAction(ISD::MSTORE, VT, Legal); |
1752 | } |
1753 | } else { |
1754 | setOperationAction(ISD::STORE, MVT::v32i16, Custom); |
1755 | setOperationAction(ISD::STORE, MVT::v64i8, Custom); |
1756 | } |
1757 | |
1758 | if (Subtarget.hasVBMI2()) { |
1759 | for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1760 | MVT::v16i16, MVT::v8i32, MVT::v4i64, |
1761 | MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { |
1762 | setOperationAction(ISD::FSHL, VT, Custom); |
1763 | setOperationAction(ISD::FSHR, VT, Custom); |
1764 | } |
1765 | |
1766 | setOperationAction(ISD::ROTL, MVT::v32i16, Custom); |
1767 | setOperationAction(ISD::ROTR, MVT::v8i16, Custom); |
1768 | setOperationAction(ISD::ROTR, MVT::v16i16, Custom); |
1769 | setOperationAction(ISD::ROTR, MVT::v32i16, Custom); |
1770 | } |
1771 | } |
1772 | |
1773 | |
1774 | |
1775 | |
1776 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
1777 | |
1778 | |
1779 | |
1780 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, |
1781 | Subtarget.hasVLX() ? Legal : Custom); |
1782 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, |
1783 | Subtarget.hasVLX() ? Legal : Custom); |
1784 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, |
1785 | Subtarget.hasVLX() ? Legal : Custom); |
1786 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, |
1787 | Subtarget.hasVLX() ? Legal : Custom); |
1788 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); |
1789 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, |
1790 | Subtarget.hasVLX() ? Legal : Custom); |
1791 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, |
1792 | Subtarget.hasVLX() ? Legal : Custom); |
1793 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, |
1794 | Subtarget.hasVLX() ? Legal : Custom); |
1795 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, |
1796 | Subtarget.hasVLX() ? Legal : Custom); |
1797 | |
1798 | if (Subtarget.hasDQI()) { |
1799 | |
1800 | |
1801 | assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && |
1802 | isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && |
1803 | "Unexpected operation action!"); |
1804 | |
1805 | setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); |
1806 | setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); |
1807 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom); |
1808 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom); |
1809 | } |
1810 | |
1811 | for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
1812 | setOperationAction(ISD::SMAX, VT, Legal); |
1813 | setOperationAction(ISD::UMAX, VT, Legal); |
1814 | setOperationAction(ISD::SMIN, VT, Legal); |
1815 | setOperationAction(ISD::UMIN, VT, Legal); |
1816 | setOperationAction(ISD::ABS, VT, Legal); |
1817 | } |
1818 | |
1819 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
1820 | setOperationAction(ISD::ROTL, VT, Custom); |
1821 | setOperationAction(ISD::ROTR, VT, Custom); |
1822 | } |
1823 | |
1824 | |
1825 | setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom); |
1826 | setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom); |
1827 | |
1828 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1829 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
1830 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1831 | |
1832 | if (Subtarget.hasDQI()) { |
1833 | for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
1834 | setOperationAction(ISD::SINT_TO_FP, VT, |
1835 | Subtarget.hasVLX() ? Legal : Custom); |
1836 | setOperationAction(ISD::UINT_TO_FP, VT, |
1837 | Subtarget.hasVLX() ? Legal : Custom); |
1838 | setOperationAction(ISD::STRICT_SINT_TO_FP, VT, |
1839 | Subtarget.hasVLX() ? Legal : Custom); |
1840 | setOperationAction(ISD::STRICT_UINT_TO_FP, VT, |
1841 | Subtarget.hasVLX() ? Legal : Custom); |
1842 | setOperationAction(ISD::FP_TO_SINT, VT, |
1843 | Subtarget.hasVLX() ? Legal : Custom); |
1844 | setOperationAction(ISD::FP_TO_UINT, VT, |
1845 | Subtarget.hasVLX() ? Legal : Custom); |
1846 | setOperationAction(ISD::STRICT_FP_TO_SINT, VT, |
1847 | Subtarget.hasVLX() ? Legal : Custom); |
1848 | setOperationAction(ISD::STRICT_FP_TO_UINT, VT, |
1849 | Subtarget.hasVLX() ? Legal : Custom); |
1850 | setOperationAction(ISD::MUL, VT, Legal); |
1851 | } |
1852 | } |
1853 | |
1854 | if (Subtarget.hasCDI()) { |
1855 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
1856 | setOperationAction(ISD::CTLZ, VT, Legal); |
1857 | } |
1858 | } |
1859 | |
1860 | if (Subtarget.hasVPOPCNTDQ()) { |
1861 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) |
1862 | setOperationAction(ISD::CTPOP, VT, Legal); |
1863 | } |
1864 | } |
1865 | |
1866 | |
1867 | |
1868 | |
1869 | if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { |
1870 | addRegisterClass(MVT::v32i1, &X86::VK32RegClass); |
1871 | addRegisterClass(MVT::v64i1, &X86::VK64RegClass); |
1872 | |
1873 | for (auto VT : { MVT::v32i1, MVT::v64i1 }) { |
1874 | setOperationAction(ISD::VSELECT, VT, Expand); |
1875 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1876 | setOperationAction(ISD::SETCC, VT, Custom); |
1877 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1878 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1879 | setOperationAction(ISD::SELECT, VT, Custom); |
1880 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1881 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1882 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1883 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1884 | } |
1885 | |
1886 | for (auto VT : { MVT::v16i1, MVT::v32i1 }) |
1887 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1888 | |
1889 | |
1890 | setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); |
1891 | setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); |
1892 | setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); |
1893 | |
1894 | for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { |
1895 | setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
1896 | setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); |
1897 | } |
1898 | |
1899 | |
1900 | |
1901 | |
1902 | |
1903 | if (Subtarget.hasBITALG()) { |
1904 | for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 }) |
1905 | setOperationAction(ISD::CTPOP, VT, Legal); |
1906 | } |
1907 | } |
1908 | |
1909 | if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) { |
1910 | auto setGroup = [&] (MVT VT) { |
1911 | setOperationAction(ISD::FADD, VT, Legal); |
1912 | setOperationAction(ISD::STRICT_FADD, VT, Legal); |
1913 | setOperationAction(ISD::FSUB, VT, Legal); |
1914 | setOperationAction(ISD::STRICT_FSUB, VT, Legal); |
1915 | setOperationAction(ISD::FMUL, VT, Legal); |
1916 | setOperationAction(ISD::STRICT_FMUL, VT, Legal); |
1917 | setOperationAction(ISD::FDIV, VT, Legal); |
1918 | setOperationAction(ISD::STRICT_FDIV, VT, Legal); |
1919 | setOperationAction(ISD::FSQRT, VT, Legal); |
1920 | setOperationAction(ISD::STRICT_FSQRT, VT, Legal); |
1921 | |
1922 | setOperationAction(ISD::FFLOOR, VT, Legal); |
1923 | setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); |
1924 | setOperationAction(ISD::FCEIL, VT, Legal); |
1925 | setOperationAction(ISD::STRICT_FCEIL, VT, Legal); |
1926 | setOperationAction(ISD::FTRUNC, VT, Legal); |
1927 | setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); |
1928 | setOperationAction(ISD::FRINT, VT, Legal); |
1929 | setOperationAction(ISD::STRICT_FRINT, VT, Legal); |
1930 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
1931 | setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); |
1932 | |
1933 | setOperationAction(ISD::LOAD, VT, Legal); |
1934 | setOperationAction(ISD::STORE, VT, Legal); |
1935 | |
1936 | setOperationAction(ISD::FMA, VT, Legal); |
1937 | setOperationAction(ISD::STRICT_FMA, VT, Legal); |
1938 | setOperationAction(ISD::VSELECT, VT, Legal); |
1939 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1940 | setOperationAction(ISD::SELECT, VT, Custom); |
1941 | |
1942 | setOperationAction(ISD::FNEG, VT, Custom); |
1943 | setOperationAction(ISD::FABS, VT, Custom); |
1944 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1945 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1946 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1947 | }; |
1948 | |
1949 | |
1950 | setGroup(MVT::f16); |
1951 | addRegisterClass(MVT::f16, &X86::FR16XRegClass); |
1952 | setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); |
1953 | setOperationAction(ISD::BR_CC, MVT::f16, Expand); |
1954 | setOperationAction(ISD::SETCC, MVT::f16, Custom); |
1955 | setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); |
1956 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); |
1957 | setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); |
1958 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); |
1959 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); |
1960 | if (isTypeLegal(MVT::f80)) { |
1961 | setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom); |
1962 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom); |
1963 | } |
1964 | |
1965 | setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand); |
1966 | setCondCodeAction(ISD::SETUNE, MVT::f16, Expand); |
1967 | |
1968 | if (Subtarget.useAVX512Regs()) { |
1969 | setGroup(MVT::v32f16); |
1970 | addRegisterClass(MVT::v32f16, &X86::VR512RegClass); |
1971 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom); |
1972 | setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal); |
1973 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal); |
1974 | setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal); |
1975 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal); |
1976 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal); |
1977 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); |
1978 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom); |
1979 | |
1980 | setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom); |
1981 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom); |
1982 | setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom); |
1983 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom); |
1984 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16); |
1985 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8, |
1986 | MVT::v32i16); |
1987 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16); |
1988 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8, |
1989 | MVT::v32i16); |
1990 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16); |
1991 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1, |
1992 | MVT::v32i16); |
1993 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16); |
1994 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1, |
1995 | MVT::v32i16); |
1996 | |
1997 | setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal); |
1998 | setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal); |
1999 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom); |
2000 | |
2001 | setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal); |
2002 | setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal); |
2003 | |
2004 | setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom); |
2005 | setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom); |
2006 | } |
2007 | |
2008 | if (Subtarget.hasVLX()) { |
2009 | addRegisterClass(MVT::v8f16, &X86::VR128XRegClass); |
2010 | addRegisterClass(MVT::v16f16, &X86::VR256XRegClass); |
2011 | setGroup(MVT::v8f16); |
2012 | setGroup(MVT::v16f16); |
2013 | |
2014 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal); |
2015 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom); |
2016 | setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal); |
2017 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal); |
2018 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal); |
2019 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal); |
2020 | setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal); |
2021 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal); |
2022 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal); |
2023 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal); |
2024 | |
2025 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); |
2026 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom); |
2027 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); |
2028 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom); |
2029 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal); |
2030 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); |
2031 | |
2032 | |
2033 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom); |
2034 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom); |
2035 | |
2036 | setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal); |
2037 | setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal); |
2038 | setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom); |
2039 | |
2040 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal); |
2041 | setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal); |
2042 | setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal); |
2043 | setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal); |
2044 | |
2045 | |
2046 | setOperationAction(ISD::LOAD, MVT::v4f16, Custom); |
2047 | setOperationAction(ISD::STORE, MVT::v4f16, Custom); |
2048 | } |
2049 | |
2050 | |
2051 | addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf())); |
2052 | } |
2053 | |
2054 | if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { |
2055 | setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); |
2056 | setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); |
2057 | setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); |
2058 | setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); |
2059 | setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); |
2060 | |
2061 | setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); |
2062 | setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); |
2063 | setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); |
2064 | setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); |
2065 | setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); |
2066 | |
2067 | if (Subtarget.hasBWI()) { |
2068 | setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); |
2069 | setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); |
2070 | } |
2071 | |
2072 | if (Subtarget.hasFP16()) { |
2073 | |
2074 | setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom); |
2075 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom); |
2076 | setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom); |
2077 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom); |
2078 | setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom); |
2079 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom); |
2080 | setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom); |
2081 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom); |
2082 | |
2083 | setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom); |
2084 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom); |
2085 | setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom); |
2086 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom); |
2087 | setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom); |
2088 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom); |
2089 | setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom); |
2090 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom); |
2091 | |
2092 | setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom); |
2093 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom); |
2094 | setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom); |
2095 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom); |
2096 | |
2097 | setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom); |
2098 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom); |
2099 | setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom); |
2100 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom); |
2101 | } |
2102 | |
2103 | setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom); |
2104 | setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom); |
2105 | setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); |
2106 | } |
2107 | |
2108 | if (Subtarget.hasAMXTILE()) { |
2109 | addRegisterClass(MVT::x86amx, &X86::TILERegClass); |
2110 | } |
2111 | |
2112 | |
2113 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
2114 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
2115 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
2116 | if (!Subtarget.is64Bit()) { |
2117 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); |
2118 | } |
2119 | |
2120 | |
2121 | |
2122 | |
2123 | |
2124 | |
2125 | |
2126 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
2127 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
2128 | continue; |
2129 | |
2130 | setOperationAction(ISD::SADDO, VT, Custom); |
2131 | setOperationAction(ISD::UADDO, VT, Custom); |
2132 | setOperationAction(ISD::SSUBO, VT, Custom); |
2133 | setOperationAction(ISD::USUBO, VT, Custom); |
2134 | setOperationAction(ISD::SMULO, VT, Custom); |
2135 | setOperationAction(ISD::UMULO, VT, Custom); |
2136 | |
2137 | |
2138 | setOperationAction(ISD::ADDCARRY, VT, Custom); |
2139 | setOperationAction(ISD::SUBCARRY, VT, Custom); |
2140 | setOperationAction(ISD::SETCCCARRY, VT, Custom); |
2141 | setOperationAction(ISD::SADDO_CARRY, VT, Custom); |
2142 | setOperationAction(ISD::SSUBO_CARRY, VT, Custom); |
2143 | } |
2144 | |
2145 | if (!Subtarget.is64Bit()) { |
2146 | |
2147 | setLibcallName(RTLIB::SHL_I128, nullptr); |
2148 | setLibcallName(RTLIB::SRL_I128, nullptr); |
2149 | setLibcallName(RTLIB::SRA_I128, nullptr); |
2150 | setLibcallName(RTLIB::MUL_I128, nullptr); |
2151 | setLibcallName(RTLIB::MULO_I128, nullptr); |
2152 | } |
2153 | |
2154 | |
2155 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
2156 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
2157 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
2158 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
2159 | } |
2160 | |
2161 | if (Subtarget.isTargetWin64()) { |
2162 | setOperationAction(ISD::SDIV, MVT::i128, Custom); |
2163 | setOperationAction(ISD::UDIV, MVT::i128, Custom); |
2164 | setOperationAction(ISD::SREM, MVT::i128, Custom); |
2165 | setOperationAction(ISD::UREM, MVT::i128, Custom); |
2166 | } |
2167 | |
2168 | |
2169 | |
2170 | |
2171 | |
2172 | if (Subtarget.is32Bit() && |
2173 | (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium())) |
2174 | for (ISD::NodeType Op : |
2175 | {ISD::FCEIL, ISD::STRICT_FCEIL, |
2176 | ISD::FCOS, ISD::STRICT_FCOS, |
2177 | ISD::FEXP, ISD::STRICT_FEXP, |
2178 | ISD::FFLOOR, ISD::STRICT_FFLOOR, |
2179 | ISD::FREM, ISD::STRICT_FREM, |
2180 | ISD::FLOG, ISD::STRICT_FLOG, |
2181 | ISD::FLOG10, ISD::STRICT_FLOG10, |
2182 | ISD::FPOW, ISD::STRICT_FPOW, |
2183 | ISD::FSIN, ISD::STRICT_FSIN}) |
2184 | if (isOperationExpand(Op, MVT::f32)) |
2185 | setOperationAction(Op, MVT::f32, Promote); |
2186 | |
2187 | |
2188 | setTargetDAGCombine(ISD::VECTOR_SHUFFLE); |
2189 | setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); |
2190 | setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
2191 | setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
2192 | setTargetDAGCombine(ISD::CONCAT_VECTORS); |
2193 | setTargetDAGCombine(ISD::INSERT_SUBVECTOR); |
2194 | setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); |
2195 | setTargetDAGCombine(ISD::BITCAST); |
2196 | setTargetDAGCombine(ISD::VSELECT); |
2197 | setTargetDAGCombine(ISD::SELECT); |
2198 | setTargetDAGCombine(ISD::SHL); |
2199 | setTargetDAGCombine(ISD::SRA); |
2200 | setTargetDAGCombine(ISD::SRL); |
2201 | setTargetDAGCombine(ISD::OR); |
2202 | setTargetDAGCombine(ISD::AND); |
2203 | setTargetDAGCombine(ISD::ADD); |
2204 | setTargetDAGCombine(ISD::FADD); |
2205 | setTargetDAGCombine(ISD::FSUB); |
2206 | setTargetDAGCombine(ISD::FNEG); |
2207 | setTargetDAGCombine(ISD::FMA); |
2208 | setTargetDAGCombine(ISD::STRICT_FMA); |
2209 | setTargetDAGCombine(ISD::FMINNUM); |
2210 | setTargetDAGCombine(ISD::FMAXNUM); |
2211 | setTargetDAGCombine(ISD::SUB); |
2212 | setTargetDAGCombine(ISD::LOAD); |
2213 | setTargetDAGCombine(ISD::MLOAD); |
2214 | setTargetDAGCombine(ISD::STORE); |
2215 | setTargetDAGCombine(ISD::MSTORE); |
2216 | setTargetDAGCombine(ISD::TRUNCATE); |
2217 | setTargetDAGCombine(ISD::ZERO_EXTEND); |
2218 | setTargetDAGCombine(ISD::ANY_EXTEND); |
2219 | setTargetDAGCombine(ISD::SIGN_EXTEND); |
2220 | setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
2221 | setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG); |
2222 | setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG); |
2223 | setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); |
2224 | setTargetDAGCombine(ISD::SINT_TO_FP); |
2225 | setTargetDAGCombine(ISD::UINT_TO_FP); |
2226 | setTargetDAGCombine(ISD::STRICT_SINT_TO_FP); |
2227 | setTargetDAGCombine(ISD::STRICT_UINT_TO_FP); |
2228 | setTargetDAGCombine(ISD::SETCC); |
2229 | setTargetDAGCombine(ISD::MUL); |
2230 | setTargetDAGCombine(ISD::XOR); |
2231 | setTargetDAGCombine(ISD::MSCATTER); |
2232 | setTargetDAGCombine(ISD::MGATHER); |
2233 | setTargetDAGCombine(ISD::FP16_TO_FP); |
2234 | setTargetDAGCombine(ISD::FP_EXTEND); |
2235 | setTargetDAGCombine(ISD::STRICT_FP_EXTEND); |
2236 | setTargetDAGCombine(ISD::FP_ROUND); |
2237 | |
2238 | computeRegisterProperties(Subtarget.getRegisterInfo()); |
2239 | |
2240 | MaxStoresPerMemset = 16; |
2241 | MaxStoresPerMemsetOptSize = 8; |
2242 | MaxStoresPerMemcpy = 8; |
2243 | MaxStoresPerMemcpyOptSize = 4; |
2244 | MaxStoresPerMemmove = 8; |
2245 | MaxStoresPerMemmoveOptSize = 4; |
2246 | |
2247 | |
2248 | |
2249 | |
2250 | MaxLoadsPerMemcmp = 2; |
2251 | MaxLoadsPerMemcmpOptSize = 2; |
2252 | |
2253 | |
2254 | setPrefLoopAlignment(Align(16)); |
2255 | |
2256 | |
2257 | |
2258 | PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder(); |
2259 | EnableExtLdPromotion = true; |
2260 | setPrefFunctionAlignment(Align(16)); |
2261 | |
2262 | verifyIntrinsicTables(); |
2263 | |
2264 | |
2265 | IsStrictFPEnabled = true; |
2266 | } |
2267 | |
2268 | |
2269 | bool X86TargetLowering::useLoadStackGuardNode() const { |
2270 | return Subtarget.isTargetMachO() && Subtarget.is64Bit(); |
2271 | } |
2272 | |
2273 | bool X86TargetLowering::useStackGuardXorFP() const { |
2274 | |
2275 | return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO(); |
2276 | } |
2277 | |
2278 | SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
2279 | const SDLoc &DL) const { |
2280 | EVT PtrTy = getPointerTy(DAG.getDataLayout()); |
2281 | unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP; |
2282 | MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val); |
2283 | return SDValue(Node, 0); |
2284 | } |
2285 | |
2286 | TargetLoweringBase::LegalizeTypeAction |
2287 | X86TargetLowering::getPreferredVectorAction(MVT VT) const { |
2288 | if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() && |
2289 | !Subtarget.hasBWI()) |
2290 | return TypeSplitVector; |
2291 | |
2292 | if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && |
2293 | VT.getVectorElementType() != MVT::i1) |
2294 | return TypeWidenVector; |
2295 | |
2296 | return TargetLoweringBase::getPreferredVectorAction(VT); |
2297 | } |
2298 | |
2299 | static std::pair<MVT, unsigned> |
2300 | handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, |
2301 | const X86Subtarget &Subtarget) { |
2302 | |
2303 | |
2304 | if (NumElts == 2) |
2305 | return {MVT::v2i64, 1}; |
2306 | if (NumElts == 4) |
2307 | return {MVT::v4i32, 1}; |
2308 | if (NumElts == 8 && CC != CallingConv::X86_RegCall && |
2309 | CC != CallingConv::Intel_OCL_BI) |
2310 | return {MVT::v8i16, 1}; |
2311 | if (NumElts == 16 && CC != CallingConv::X86_RegCall && |
2312 | CC != CallingConv::Intel_OCL_BI) |
2313 | return {MVT::v16i8, 1}; |
2314 | |
2315 | |
2316 | if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) |
2317 | return {MVT::v32i8, 1}; |
2318 | |
2319 | if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { |
2320 | if (Subtarget.useAVX512Regs()) |
2321 | return {MVT::v64i8, 1}; |
2322 | return {MVT::v32i8, 2}; |
2323 | } |
2324 | |
2325 | |
2326 | if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || |
2327 | NumElts > 64) |
2328 | return {MVT::i8, NumElts}; |
2329 | |
2330 | return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; |
2331 | } |
2332 | |
2333 | MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
2334 | CallingConv::ID CC, |
2335 | EVT VT) const { |
2336 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
2337 | Subtarget.hasAVX512()) { |
2338 | unsigned NumElts = VT.getVectorNumElements(); |
2339 | |
2340 | MVT RegisterVT; |
2341 | unsigned NumRegisters; |
2342 | std::tie(RegisterVT, NumRegisters) = |
2343 | handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); |
2344 | if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) |
2345 | return RegisterVT; |
2346 | } |
2347 | |
2348 | |
2349 | |
2350 | if (VT == MVT::v3f16 && Subtarget.hasFP16()) |
2351 | return MVT::v8f16; |
2352 | |
2353 | return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
2354 | } |
2355 | |
2356 | unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
2357 | CallingConv::ID CC, |
2358 | EVT VT) const { |
2359 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
2360 | Subtarget.hasAVX512()) { |
2361 | unsigned NumElts = VT.getVectorNumElements(); |
2362 | |
2363 | MVT RegisterVT; |
2364 | unsigned NumRegisters; |
2365 | std::tie(RegisterVT, NumRegisters) = |
2366 | handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); |
2367 | if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) |
2368 | return NumRegisters; |
2369 | } |
2370 | |
2371 | |
2372 | |
2373 | if (VT == MVT::v3f16 && Subtarget.hasFP16()) |
2374 | return 1; |
2375 | |
2376 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
2377 | } |
2378 | |
2379 | unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( |
2380 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
2381 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
2382 | |
2383 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
2384 | Subtarget.hasAVX512() && |
2385 | (!isPowerOf2_32(VT.getVectorNumElements()) || |
2386 | (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || |
2387 | VT.getVectorNumElements() > 64)) { |
2388 | RegisterVT = MVT::i8; |
2389 | IntermediateVT = MVT::i1; |
2390 | NumIntermediates = VT.getVectorNumElements(); |
2391 | return NumIntermediates; |
2392 | } |
2393 | |
2394 | |
2395 | if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && |
2396 | CC != CallingConv::X86_RegCall) { |
2397 | RegisterVT = MVT::v32i8; |
2398 | IntermediateVT = MVT::v32i1; |
2399 | NumIntermediates = 2; |
2400 | return 2; |
2401 | } |
2402 | |
2403 | return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, |
2404 | NumIntermediates, RegisterVT); |
2405 | } |
2406 | |
2407 | EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, |
2408 | LLVMContext& Context, |
2409 | EVT VT) const { |
2410 | if (!VT.isVector()) |
2411 | return MVT::i8; |
2412 | |
2413 | if (Subtarget.hasAVX512()) { |
2414 | |
2415 | EVT LegalVT = VT; |
2416 | while (getTypeAction(Context, LegalVT) != TypeLegal) |
2417 | LegalVT = getTypeToTransformTo(Context, LegalVT); |
2418 | |
2419 | |
2420 | if (LegalVT.getSimpleVT().is512BitVector()) |
2421 | return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
2422 | |
2423 | if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { |
2424 | |
2425 | |
2426 | |
2427 | MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); |
2428 | if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) |
2429 | return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
2430 | } |
2431 | } |
2432 | |
2433 | return VT.changeVectorElementTypeToInteger(); |
2434 | } |
2435 | |
2436 | |
2437 | |
2438 | static void getMaxByValAlign(Type *Ty, Align &MaxAlign) { |
2439 | if (MaxAlign == 16) |
2440 | return; |
2441 | if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { |
2442 | if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128) |
2443 | MaxAlign = Align(16); |
2444 | } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { |
2445 | Align EltAlign; |
2446 | getMaxByValAlign(ATy->getElementType(), EltAlign); |
2447 | if (EltAlign > MaxAlign) |
2448 | MaxAlign = EltAlign; |
2449 | } else if (StructType *STy = dyn_cast<StructType>(Ty)) { |
2450 | for (auto *EltTy : STy->elements()) { |
2451 | Align EltAlign; |
2452 | getMaxByValAlign(EltTy, EltAlign); |
2453 | if (EltAlign > MaxAlign) |
2454 | MaxAlign = EltAlign; |
2455 | if (MaxAlign == 16) |
2456 | break; |
2457 | } |
2458 | } |
2459 | } |
2460 | |
2461 | |
2462 | |
2463 | |
2464 | |
2465 | unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, |
2466 | const DataLayout &DL) const { |
2467 | if (Subtarget.is64Bit()) { |
2468 | |
2469 | Align TyAlign = DL.getABITypeAlign(Ty); |
2470 | if (TyAlign > 8) |
2471 | return TyAlign.value(); |
2472 | return 8; |
2473 | } |
2474 | |
2475 | Align Alignment(4); |
2476 | if (Subtarget.hasSSE1()) |
2477 | getMaxByValAlign(Ty, Alignment); |
2478 | return Alignment.value(); |
2479 | } |
2480 | |
2481 | |
2482 | |
2483 | |
2484 | |
2485 | EVT X86TargetLowering::getOptimalMemOpType( |
2486 | const MemOp &Op, const AttributeList &FuncAttributes) const { |
2487 | if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { |
2488 | if (Op.size() >= 16 && |
2489 | (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { |
2490 | |
2491 | if (Op.size() >= 64 && Subtarget.hasAVX512() && |
2492 | (Subtarget.getPreferVectorWidth() >= 512)) { |
2493 | return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; |
2494 | } |
2495 | |
2496 | if (Op.size() >= 32 && Subtarget.hasAVX() && |
2497 | (Subtarget.getPreferVectorWidth() >= 256)) { |
2498 | |
2499 | |
2500 | |
2501 | |
2502 | |
2503 | return MVT::v32i8; |
2504 | } |
2505 | if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128)) |
2506 | return MVT::v16i8; |
2507 | |
2508 | |
2509 | if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && |
2510 | (Subtarget.getPreferVectorWidth() >= 128)) |
2511 | return MVT::v4f32; |
2512 | } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) && |
2513 | Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { |
2514 | |
2515 | |
2516 | |
2517 | |
2518 | |
2519 | |
2520 | return MVT::f64; |
2521 | } |
2522 | } |
2523 | |
2524 | |
2525 | |
2526 | if (Subtarget.is64Bit() && Op.size() >= 8) |
2527 | return MVT::i64; |
2528 | return MVT::i32; |
2529 | } |
2530 | |
2531 | bool X86TargetLowering::isSafeMemOpType(MVT VT) const { |
2532 | if (VT == MVT::f32) |
2533 | return X86ScalarSSEf32; |
2534 | if (VT == MVT::f64) |
2535 | return X86ScalarSSEf64; |
2536 | return true; |
2537 | } |
2538 | |
2539 | bool X86TargetLowering::allowsMisalignedMemoryAccesses( |
2540 | EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags, |
2541 | bool *Fast) const { |
2542 | if (Fast) { |
2543 | switch (VT.getSizeInBits()) { |
2544 | default: |
2545 | |
2546 | *Fast = true; |
2547 | break; |
2548 | case 128: |
2549 | *Fast = !Subtarget.isUnalignedMem16Slow(); |
2550 | break; |
2551 | case 256: |
2552 | *Fast = !Subtarget.isUnalignedMem32Slow(); |
2553 | break; |
2554 | |
2555 | } |
2556 | } |
2557 | |
2558 | if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { |
2559 | |
2560 | |
2561 | |
2562 | |
2563 | if (!!(Flags & MachineMemOperand::MOLoad)) |
2564 | return (Alignment < 16 || !Subtarget.hasSSE41()); |
2565 | return false; |
2566 | } |
2567 | |
2568 | return true; |
2569 | } |
2570 | |
2571 | |
2572 | |
2573 | |
2574 | unsigned X86TargetLowering::getJumpTableEncoding() const { |
2575 | |
2576 | |
2577 | if (isPositionIndependent() && Subtarget.isPICStyleGOT()) |
2578 | return MachineJumpTableInfo::EK_Custom32; |
2579 | |
2580 | |
2581 | return TargetLowering::getJumpTableEncoding(); |
2582 | } |
2583 | |
2584 | bool X86TargetLowering::useSoftFloat() const { |
2585 | return Subtarget.useSoftFloat(); |
2586 | } |
2587 | |
2588 | void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, |
2589 | ArgListTy &Args) const { |
2590 | |
2591 | |
2592 | if (Subtarget.is64Bit()) |
2593 | return; |
2594 | if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) |
2595 | return; |
2596 | unsigned ParamRegs = 0; |
2597 | if (auto *M = MF->getFunction().getParent()) |
2598 | ParamRegs = M->getNumberRegisterParameters(); |
2599 | |
2600 | |
2601 | for (auto &Arg : Args) { |
2602 | Type *T = Arg.Ty; |
2603 | if (T->isIntOrPtrTy()) |
2604 | if (MF->getDataLayout().getTypeAllocSize(T) <= 8) { |
2605 | unsigned numRegs = 1; |
2606 | if (MF->getDataLayout().getTypeAllocSize(T) > 4) |
2607 | numRegs = 2; |
2608 | if (ParamRegs < numRegs) |
2609 | return; |
2610 | ParamRegs -= numRegs; |
2611 | Arg.IsInReg = true; |
2612 | } |
2613 | } |
2614 | } |
2615 | |
2616 | const MCExpr * |
2617 | X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
2618 | const MachineBasicBlock *MBB, |
2619 | unsigned uid,MCContext &Ctx) const{ |
2620 | assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); |
2621 | |
2622 | |
2623 | return MCSymbolRefExpr::create(MBB->getSymbol(), |
2624 | MCSymbolRefExpr::VK_GOTOFF, Ctx); |
2625 | } |
2626 | |
2627 | |
2628 | SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, |
2629 | SelectionDAG &DAG) const { |
2630 | if (!Subtarget.is64Bit()) |
2631 | |
2632 | |
2633 | return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), |
2634 | getPointerTy(DAG.getDataLayout())); |
2635 | return Table; |
2636 | } |
2637 | |
2638 | |
2639 | |
2640 | const MCExpr *X86TargetLowering:: |
2641 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, |
2642 | MCContext &Ctx) const { |
2643 | |
2644 | if (Subtarget.isPICStyleRIPRel()) |
2645 | return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); |
2646 | |
2647 | |
2648 | return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); |
2649 | } |
2650 | |
2651 | std::pair<const TargetRegisterClass *, uint8_t> |
2652 | X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, |
2653 | MVT VT) const { |
2654 | const TargetRegisterClass *RRC = nullptr; |
2655 | uint8_t Cost = 1; |
2656 | switch (VT.SimpleTy) { |
2657 | default: |
2658 | return TargetLowering::findRepresentativeClass(TRI, VT); |
2659 | case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: |
2660 | RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; |
2661 | break; |
2662 | case MVT::x86mmx: |
2663 | RRC = &X86::VR64RegClass; |
2664 | break; |
2665 | case MVT::f32: case MVT::f64: |
2666 | case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: |
2667 | case MVT::v4f32: case MVT::v2f64: |
2668 | case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: |
2669 | case MVT::v8f32: case MVT::v4f64: |
2670 | case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: |
2671 | case MVT::v16f32: case MVT::v8f64: |
2672 | RRC = &X86::VR128XRegClass; |
2673 | break; |
2674 | } |
2675 | return std::make_pair(RRC, Cost); |
2676 | } |
2677 | |
2678 | unsigned X86TargetLowering::getAddressSpace() const { |
2679 | if (Subtarget.is64Bit()) |
2680 | return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; |
2681 | return 256; |
2682 | } |
2683 | |
2684 | static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { |
2685 | return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || |
2686 | (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); |
2687 | } |
2688 | |
2689 | static Constant* SegmentOffset(IRBuilderBase &IRB, |
2690 | int Offset, unsigned AddressSpace) { |
2691 | return ConstantExpr::getIntToPtr( |
2692 | ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), |
2693 | Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); |
2694 | } |
2695 | |
2696 | Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { |
2697 | |
2698 | |
2699 | |
2700 | if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { |
2701 | if (Subtarget.isTargetFuchsia()) { |
2702 | |
2703 | return SegmentOffset(IRB, 0x10, getAddressSpace()); |
2704 | } else { |
2705 | unsigned AddressSpace = getAddressSpace(); |
2706 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
2707 | |
2708 | int Offset = M->getStackProtectorGuardOffset(); |
2709 | |
2710 | |
2711 | |
2712 | if (Offset == INT_MAX) |
2713 | Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; |
2714 | |
2715 | StringRef GuardReg = M->getStackProtectorGuardReg(); |
2716 | if (GuardReg == "fs") |
2717 | AddressSpace = X86AS::FS; |
2718 | else if (GuardReg == "gs") |
2719 | AddressSpace = X86AS::GS; |
2720 | return SegmentOffset(IRB, Offset, AddressSpace); |
2721 | } |
2722 | } |
2723 | return TargetLowering::getIRStackGuard(IRB); |
2724 | } |
2725 | |
2726 | void X86TargetLowering::insertSSPDeclarations(Module &M) const { |
2727 | |
2728 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
2729 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
2730 | |
2731 | M.getOrInsertGlobal("__security_cookie", |
2732 | Type::getInt8PtrTy(M.getContext())); |
2733 | |
2734 | |
2735 | FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( |
2736 | "__security_check_cookie", Type::getVoidTy(M.getContext()), |
2737 | Type::getInt8PtrTy(M.getContext())); |
2738 | if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { |
2739 | F->setCallingConv(CallingConv::X86_FastCall); |
2740 | F->addParamAttr(0, Attribute::AttrKind::InReg); |
2741 | } |
2742 | return; |
2743 | } |
2744 | |
2745 | StringRef GuardMode = M.getStackProtectorGuard(); |
2746 | |
2747 | |
2748 | if ((GuardMode == "tls" || GuardMode.empty()) && |
2749 | hasStackGuardSlotTLS(Subtarget.getTargetTriple())) |
2750 | return; |
2751 | TargetLowering::insertSSPDeclarations(M); |
2752 | } |
2753 | |
2754 | Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { |
2755 | |
2756 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
2757 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
2758 | return M.getGlobalVariable("__security_cookie"); |
2759 | } |
2760 | return TargetLowering::getSDagStackGuard(M); |
2761 | } |
2762 | |
2763 | Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { |
2764 | |
2765 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
2766 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
2767 | return M.getFunction("__security_check_cookie"); |
2768 | } |
2769 | return TargetLowering::getSSPStackGuardCheck(M); |
2770 | } |
2771 | |
2772 | Value * |
2773 | X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { |
2774 | if (Subtarget.getTargetTriple().isOSContiki()) |
2775 | return getDefaultSafeStackPointerLocation(IRB, false); |
2776 | |
2777 | |
2778 | |
2779 | |
2780 | if (Subtarget.isTargetAndroid()) { |
2781 | |
2782 | |
2783 | int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; |
2784 | return SegmentOffset(IRB, Offset, getAddressSpace()); |
2785 | } |
2786 | |
2787 | |
2788 | if (Subtarget.isTargetFuchsia()) { |
2789 | |
2790 | return SegmentOffset(IRB, 0x18, getAddressSpace()); |
2791 | } |
2792 | |
2793 | return TargetLowering::getSafeStackPointerLocation(IRB); |
2794 | } |
2795 | |
2796 | |
2797 | |
2798 | |
2799 | |
2800 | bool X86TargetLowering::CanLowerReturn( |
2801 | CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, |
2802 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
2803 | SmallVector<CCValAssign, 16> RVLocs; |
2804 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); |
2805 | return CCInfo.CheckReturn(Outs, RetCC_X86); |
2806 | } |
2807 | |
2808 | const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { |
2809 | static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; |
2810 | return ScratchRegs; |
2811 | } |
2812 | |
2813 | |
2814 | |
2815 | static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, |
2816 | const SDLoc &Dl, SelectionDAG &DAG) { |
2817 | EVT ValVT = ValArg.getValueType(); |
2818 | |
2819 | if (ValVT == MVT::v1i1) |
2820 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg, |
2821 | DAG.getIntPtrConstant(0, Dl)); |
2822 | |
2823 | if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || |
2824 | (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { |
2825 | |
2826 | |
2827 | |
2828 | EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; |
2829 | SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg); |
2830 | if (ValLoc == MVT::i32) |
2831 | ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy); |
2832 | return ValToCopy; |
2833 | } |
2834 | |
2835 | if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || |
2836 | (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { |
2837 | |
2838 | |
2839 | return DAG.getBitcast(ValLoc, ValArg); |
2840 | } |
2841 | |
2842 | return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg); |
2843 | } |
2844 | |
2845 | |
2846 | static void Passv64i1ArgInRegs( |
2847 | const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg, |
2848 | SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA, |
2849 | CCValAssign &NextVA, const X86Subtarget &Subtarget) { |
2850 | assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); |
2851 | assert(Subtarget.is32Bit() && "Expecting 32 bit target"); |
2852 | assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"); |
2853 | assert(VA.isRegLoc() && NextVA.isRegLoc() && |
2854 | "The value should reside in two registers"); |
2855 | |
2856 | |
2857 | Arg = DAG.getBitcast(MVT::i64, Arg); |
2858 | |
2859 | |
2860 | SDValue Lo, Hi; |
2861 | Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, |
2862 | DAG.getConstant(0, Dl, MVT::i32)); |
2863 | Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, |
2864 | DAG.getConstant(1, Dl, MVT::i32)); |
2865 | |
2866 | |
2867 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); |
2868 | RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi)); |
2869 | } |
2870 | |
2871 | SDValue |
2872 | X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
2873 | bool isVarArg, |
2874 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2875 | const SmallVectorImpl<SDValue> &OutVals, |
2876 | const SDLoc &dl, SelectionDAG &DAG) const { |
2877 | MachineFunction &MF = DAG.getMachineFunction(); |
2878 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
2879 | |
2880 | |
2881 | |
2882 | bool ShouldDisableCalleeSavedRegister = |
2883 | CallConv == CallingConv::X86_RegCall || |
2884 | MF.getFunction().hasFnAttribute("no_caller_saved_registers"); |
2885 | |
2886 | if (CallConv == CallingConv::X86_INTR && !Outs.empty()) |
2887 | report_fatal_error("X86 interrupts may not return any value"); |
2888 | |
2889 | SmallVector<CCValAssign, 16> RVLocs; |
2890 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); |
2891 | CCInfo.AnalyzeReturn(Outs, RetCC_X86); |
2892 | |
2893 | SmallVector<std::pair<Register, SDValue>, 4> RetVals; |
2894 | for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; |
2895 | ++I, ++OutsIndex) { |
2896 | CCValAssign &VA = RVLocs[I]; |
2897 | assert(VA.isRegLoc() && "Can only return in registers!"); |
2898 | |
2899 | |
2900 | if (ShouldDisableCalleeSavedRegister) |
2901 | MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); |
2902 | |
2903 | SDValue ValToCopy = OutVals[OutsIndex]; |
2904 | EVT ValVT = ValToCopy.getValueType(); |
2905 | |
2906 | |
2907 | if (VA.getLocInfo() == CCValAssign::SExt) |
2908 | ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); |
2909 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
2910 | ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); |
2911 | else if (VA.getLocInfo() == CCValAssign::AExt) { |
2912 | if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) |
2913 | ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG); |
2914 | else |
2915 | ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); |
2916 | } |
2917 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
2918 | ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); |
2919 | |
2920 | assert(VA.getLocInfo() != CCValAssign::FPExt && |
2921 | "Unexpected FP-extend for return value."); |
2922 | |
2923 | |
2924 | |
2925 | if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { |
2926 | errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); |
2927 | VA.convertToReg(X86::FP0); |
2928 | } else if (!Subtarget.hasSSE2() && |
2929 | X86::FR64XRegClass.contains(VA.getLocReg()) && |
2930 | ValVT == MVT::f64) { |
2931 | |
2932 | |
2933 | errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); |
2934 | VA.convertToReg(X86::FP0); |
2935 | } |
2936 | |
2937 | |
2938 | |
2939 | if (VA.getLocReg() == X86::FP0 || |
2940 | VA.getLocReg() == X86::FP1) { |
2941 | |
2942 | |
2943 | if (isScalarFPTypeInSSEReg(VA.getValVT())) |
2944 | ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); |
2945 | RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); |
2946 | |
2947 | continue; |
2948 | } |
2949 | |
2950 | |
2951 | |
2952 | if (Subtarget.is64Bit()) { |
2953 | if (ValVT == MVT::x86mmx) { |
2954 | if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { |
2955 | ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); |
2956 | ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, |
2957 | ValToCopy); |
2958 | |
2959 | |
2960 | if (!Subtarget.hasSSE2()) |
2961 | ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); |
2962 | } |
2963 | } |
2964 | } |
2965 | |
2966 | if (VA.needsCustom()) { |
2967 | assert(VA.getValVT() == MVT::v64i1 && |
2968 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
2969 | |
2970 | Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I], |
2971 | Subtarget); |
2972 | |
2973 | |
2974 | if (ShouldDisableCalleeSavedRegister) |
2975 | MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); |
2976 | } else { |
2977 | RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); |
2978 | } |
2979 | } |
2980 | |
2981 | SDValue Flag; |
2982 | SmallVector<SDValue, 6> RetOps; |
2983 | RetOps.push_back(Chain); |
2984 | |
2985 | RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, |
2986 | MVT::i32)); |
2987 | |
2988 | |
2989 | for (auto &RetVal : RetVals) { |
2990 | if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) { |
2991 | RetOps.push_back(RetVal.second); |
2992 | continue; |
2993 | } |
2994 | |
2995 | Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag); |
2996 | Flag = Chain.getValue(1); |
2997 | RetOps.push_back( |
2998 | DAG.getRegister(RetVal.first, RetVal.second.getValueType())); |
2999 | } |
3000 | |
3001 | |
3002 | |
3003 | |
3004 | |
3005 | |
3006 | |
3007 | |
3008 | |
3009 | |
3010 | |
3011 | |
3012 | |
3013 | if (Register SRetReg = FuncInfo->getSRetReturnReg()) { |
3014 | |
3015 | |
3016 | |
3017 | |
3018 | |
3019 | |
3020 | |
3021 | |
3022 | |
3023 | |
3024 | |
3025 | |
3026 | |
3027 | |
3028 | |
3029 | |
3030 | |
3031 | |
3032 | |
3033 | SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, |
3034 | getPointerTy(MF.getDataLayout())); |
3035 | |
3036 | Register RetValReg |
3037 | = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? |
3038 | X86::RAX : X86::EAX; |
3039 | Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); |
3040 | Flag = Chain.getValue(1); |
3041 | |
3042 | |
3043 | RetOps.push_back( |
3044 | DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); |
3045 | |
3046 | |
3047 | if (ShouldDisableCalleeSavedRegister) |
3048 | MF.getRegInfo().disableCalleeSavedRegister(RetValReg); |
3049 | } |
3050 | |
3051 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
3052 | const MCPhysReg *I = |
3053 | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); |
3054 | if (I) { |
3055 | for (; *I; ++I) { |
3056 | if (X86::GR64RegClass.contains(*I)) |
3057 | RetOps.push_back(DAG.getRegister(*I, MVT::i64)); |
3058 | else |
3059 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
3060 | } |
3061 | } |
3062 | |
3063 | RetOps[0] = Chain; |
3064 | |
3065 | |
3066 | if (Flag.getNode()) |
3067 | RetOps.push_back(Flag); |
3068 | |
3069 | X86ISD::NodeType opcode = X86ISD::RET_FLAG; |
3070 | if (CallConv == CallingConv::X86_INTR) |
3071 | opcode = X86ISD::IRET; |
3072 | return DAG.getNode(opcode, dl, MVT::Other, RetOps); |
3073 | } |
3074 | |
3075 | bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { |
3076 | if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0)) |
3077 | return false; |
3078 | |
3079 | SDValue TCChain = Chain; |
3080 | SDNode *Copy = *N->use_begin(); |
3081 | if (Copy->getOpcode() == ISD::CopyToReg) { |
3082 | |
3083 | |
3084 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) |
3085 | return false; |
3086 | TCChain = Copy->getOperand(0); |
3087 | } else if (Copy->getOpcode() != ISD::FP_EXTEND) |
3088 | return false; |
3089 | |
3090 | bool HasRet = false; |
3091 | for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); |
3092 | UI != UE; ++UI) { |
3093 | if (UI->getOpcode() != X86ISD::RET_FLAG) |
3094 | return false; |
3095 | |
3096 | |
3097 | if (UI->getNumOperands() > 4) |
3098 | return false; |
3099 | if (UI->getNumOperands() == 4 && |
3100 | UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue) |
3101 | return false; |
3102 | HasRet = true; |
3103 | } |
3104 | |
3105 | if (!HasRet) |
3106 | return false; |
3107 | |
3108 | Chain = TCChain; |
3109 | return true; |
3110 | } |
3111 | |
3112 | EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, |
3113 | ISD::NodeType ExtendKind) const { |
3114 | MVT ReturnMVT = MVT::i32; |
3115 | |
3116 | bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); |
3117 | if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { |
3118 | |
3119 | |
3120 | |
3121 | |
3122 | |
3123 | ReturnMVT = MVT::i8; |
3124 | } |
3125 | |
3126 | EVT MinVT = getRegisterType(Context, ReturnMVT); |
3127 | return VT.bitsLT(MinVT) ? MinVT : VT; |
3128 | } |
3129 | |
3130 | |
3131 | |
3132 | |
3133 | |
3134 | |
3135 | |
3136 | |
3137 | |
3138 | |
3139 | static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, |
3140 | SDValue &Root, SelectionDAG &DAG, |
3141 | const SDLoc &Dl, const X86Subtarget &Subtarget, |
3142 | SDValue *InFlag = nullptr) { |
3143 | assert((Subtarget.hasBWI()) && "Expected AVX512BW target!"); |
3144 | assert(Subtarget.is32Bit() && "Expecting 32 bit target"); |
3145 | assert(VA.getValVT() == MVT::v64i1 && |
3146 | "Expecting first location of 64 bit width type"); |
3147 | assert(NextVA.getValVT() == VA.getValVT() && |
3148 | "The locations should have the same type"); |
3149 | assert(VA.isRegLoc() && NextVA.isRegLoc() && |
3150 | "The values should reside in two registers"); |
3151 | |
3152 | SDValue Lo, Hi; |
3153 | SDValue ArgValueLo, ArgValueHi; |
3154 | |
3155 | MachineFunction &MF = DAG.getMachineFunction(); |
3156 | const TargetRegisterClass *RC = &X86::GR32RegClass; |
3157 | |
3158 | |
3159 | if (nullptr == InFlag) { |
3160 | |
3161 | |
3162 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); |
3163 | ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); |
3164 | Reg = MF.addLiveIn(NextVA.getLocReg(), RC); |
3165 | ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); |
3166 | } else { |
3167 | |
3168 | |
3169 | ArgValueLo = |
3170 | DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag); |
3171 | *InFlag = ArgValueLo.getValue(2); |
3172 | ArgValueHi = |
3173 | DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag); |
3174 | *InFlag = ArgValueHi.getValue(2); |
3175 | } |
3176 | |
3177 | |
3178 | Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo); |
3179 | |
3180 | |
3181 | Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi); |
3182 | |
3183 | |
3184 | return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi); |
3185 | } |
3186 | |
3187 | |
3188 | |
3189 | |
3190 | static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, |
3191 | const EVT &ValLoc, const SDLoc &Dl, |
3192 | SelectionDAG &DAG) { |
3193 | SDValue ValReturned = ValArg; |
3194 | |
3195 | if (ValVT == MVT::v1i1) |
3196 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned); |
3197 | |
3198 | if (ValVT == MVT::v64i1) { |
3199 | |
3200 | assert(ValLoc == MVT::i64 && "Expecting only i64 locations"); |
3201 | |
3202 | } else { |
3203 | MVT maskLen; |
3204 | switch (ValVT.getSimpleVT().SimpleTy) { |
3205 | case MVT::v8i1: |
3206 | maskLen = MVT::i8; |
3207 | break; |
3208 | case MVT::v16i1: |
3209 | maskLen = MVT::i16; |
3210 | break; |
3211 | case MVT::v32i1: |
3212 | maskLen = MVT::i32; |
3213 | break; |
3214 | default: |
3215 | llvm_unreachable("Expecting a vector of i1 types"); |
3216 | } |
3217 | |
3218 | ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned); |
3219 | } |
3220 | return DAG.getBitcast(ValVT, ValReturned); |
3221 | } |
3222 | |
3223 | |
3224 | |
3225 | |
3226 | SDValue X86TargetLowering::LowerCallResult( |
3227 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
3228 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
3229 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, |
3230 | uint32_t *RegMask) const { |
3231 | |
3232 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
3233 | |
3234 | SmallVector<CCValAssign, 16> RVLocs; |
3235 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
3236 | *DAG.getContext()); |
3237 | CCInfo.AnalyzeCallResult(Ins, RetCC_X86); |
3238 | |
3239 | |
3240 | for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E; |
3241 | ++I, ++InsIndex) { |
3242 | CCValAssign &VA = RVLocs[I]; |
3243 | EVT CopyVT = VA.getLocVT(); |
3244 | |
3245 | |
3246 | |
3247 | if (RegMask) { |
3248 | for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, true); |
3249 | SubRegs.isValid(); ++SubRegs) |
3250 | RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); |
3251 | } |
3252 | |
3253 | |
3254 | |
3255 | if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { |
3256 | errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); |
3257 | if (VA.getLocReg() == X86::XMM1) |
3258 | VA.convertToReg(X86::FP1); |
3259 | else |
3260 | VA.convertToReg(X86::FP0); |
3261 | } else if (!Subtarget.hasSSE2() && |
3262 | X86::FR64XRegClass.contains(VA.getLocReg()) && |
3263 | CopyVT == MVT::f64) { |
3264 | errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); |
3265 | if (VA.getLocReg() == X86::XMM1) |
3266 | VA.convertToReg(X86::FP1); |
3267 | else |
3268 | VA.convertToReg(X86::FP0); |
3269 | } |
3270 | |
3271 | |
3272 | |
3273 | bool RoundAfterCopy = false; |
3274 | if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && |
3275 | isScalarFPTypeInSSEReg(VA.getValVT())) { |
3276 | if (!Subtarget.hasX87()) |
3277 | report_fatal_error("X87 register return with X87 disabled"); |
3278 | CopyVT = MVT::f80; |
3279 | RoundAfterCopy = (CopyVT != VA.getLocVT()); |
3280 | } |
3281 | |
3282 | SDValue Val; |
3283 | if (VA.needsCustom()) { |
3284 | assert(VA.getValVT() == MVT::v64i1 && |
3285 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
3286 | Val = |
3287 | getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag); |
3288 | } else { |
3289 | Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag) |
3290 | .getValue(1); |
3291 | Val = Chain.getValue(0); |
3292 | InFlag = Chain.getValue(2); |
3293 | } |
3294 | |
3295 | if (RoundAfterCopy) |
3296 | Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, |
3297 | |
3298 | DAG.getIntPtrConstant(1, dl)); |
3299 | |
3300 | if (VA.isExtInLoc()) { |
3301 | if (VA.getValVT().isVector() && |
3302 | VA.getValVT().getScalarType() == MVT::i1 && |
3303 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
3304 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
3305 | |
3306 | Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); |
3307 | } else |
3308 | Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); |
3309 | } |
3310 | |
3311 | if (VA.getLocInfo() == CCValAssign::BCvt) |
3312 | Val = DAG.getBitcast(VA.getValVT(), Val); |
3313 | |
3314 | InVals.push_back(Val); |
3315 | } |
3316 | |
3317 | return Chain; |
3318 | } |
3319 | |
3320 | |
3321 | |
3322 | |
3323 | |
3324 | |
3325 | |
3326 | |
3327 | |
3328 | |
3329 | |
3330 | |
3331 | |
3332 | enum StructReturnType { |
3333 | NotStructReturn, |
3334 | RegStructReturn, |
3335 | StackStructReturn |
3336 | }; |
3337 | static StructReturnType |
3338 | callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) { |
3339 | if (Outs.empty()) |
3340 | return NotStructReturn; |
3341 | |
3342 | const ISD::ArgFlagsTy &Flags = Outs[0].Flags; |
3343 | if (!Flags.isSRet()) |
3344 | return NotStructReturn; |
3345 | if (Flags.isInReg() || IsMCU) |
3346 | return RegStructReturn; |
3347 | return StackStructReturn; |
3348 | } |
3349 | |
3350 | |
3351 | static StructReturnType |
3352 | argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) { |
3353 | if (Ins.empty()) |
3354 | return NotStructReturn; |
3355 | |
3356 | const ISD::ArgFlagsTy &Flags = Ins[0].Flags; |
3357 | if (!Flags.isSRet()) |
3358 | return NotStructReturn; |
3359 | if (Flags.isInReg() || IsMCU) |
3360 | return RegStructReturn; |
3361 | return StackStructReturn; |
3362 | } |
3363 | |
3364 | |
3365 | |
3366 | |
3367 | static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, |
3368 | SDValue Chain, ISD::ArgFlagsTy Flags, |
3369 | SelectionDAG &DAG, const SDLoc &dl) { |
3370 | SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl); |
3371 | |
3372 | return DAG.getMemcpy( |
3373 | Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), |
3374 | false, true, |
3375 | false, MachinePointerInfo(), MachinePointerInfo()); |
3376 | } |
3377 | |
3378 | |
3379 | static bool canGuaranteeTCO(CallingConv::ID CC) { |
3380 | return (CC == CallingConv::Fast || CC == CallingConv::GHC || |
3381 | CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || |
3382 | CC == CallingConv::HHVM || CC == CallingConv::Tail || |
3383 | CC == CallingConv::SwiftTail); |
3384 | } |
3385 | |
3386 | |
3387 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
3388 | switch (CC) { |
3389 | |
3390 | case CallingConv::C: |
3391 | case CallingConv::Win64: |
3392 | case CallingConv::X86_64_SysV: |
3393 | |
3394 | case CallingConv::X86_ThisCall: |
3395 | case CallingConv::X86_StdCall: |
3396 | case CallingConv::X86_VectorCall: |
3397 | case CallingConv::X86_FastCall: |
3398 | |
3399 | case CallingConv::Swift: |
3400 | return true; |
3401 | default: |
3402 | return canGuaranteeTCO(CC); |
3403 | } |
3404 | } |
3405 | |
3406 | |
3407 | |
3408 | static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { |
3409 | return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || |
3410 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
3411 | } |
3412 | |
3413 | bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
3414 | if (!CI->isTailCall()) |
3415 | return false; |
3416 | |
3417 | CallingConv::ID CalleeCC = CI->getCallingConv(); |
3418 | if (!mayTailCallThisCC(CalleeCC)) |
3419 | return false; |
3420 | |
3421 | return true; |
3422 | } |
3423 | |
3424 | SDValue |
3425 | X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, |
3426 | const SmallVectorImpl<ISD::InputArg> &Ins, |
3427 | const SDLoc &dl, SelectionDAG &DAG, |
3428 | const CCValAssign &VA, |
3429 | MachineFrameInfo &MFI, unsigned i) const { |
3430 | |
3431 | ISD::ArgFlagsTy Flags = Ins[i].Flags; |
3432 | bool AlwaysUseMutable = shouldGuaranteeTCO( |
3433 | CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); |
3434 | bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); |
3435 | EVT ValVT; |
3436 | MVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3437 | |
3438 | |
3439 | |
3440 | |
3441 | bool ExtendedInMem = |
3442 | VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 && |
3443 | VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits(); |
3444 | |
3445 | if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) |
3446 | ValVT = VA.getLocVT(); |
3447 | else |
3448 | ValVT = VA.getValVT(); |
3449 | |
3450 | |
3451 | |
3452 | |
3453 | |
3454 | if (Flags.isByVal()) { |
3455 | unsigned Bytes = Flags.getByValSize(); |
3456 | if (Bytes == 0) Bytes = 1; |
3457 | |
3458 | |
3459 | |
3460 | int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable, |
3461 | true); |
3462 | return DAG.getFrameIndex(FI, PtrVT); |
3463 | } |
3464 | |
3465 | EVT ArgVT = Ins[i].ArgVT; |
3466 | |
3467 | |
3468 | |
3469 | |
3470 | |
3471 | bool ScalarizedAndExtendedVector = |
3472 | ArgVT.isVector() && !VA.getLocVT().isVector() && |
3473 | VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits(); |
3474 | |
3475 | |
3476 | |
3477 | |
3478 | |
3479 | if (Flags.isCopyElisionCandidate() && |
3480 | VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && |
3481 | !ScalarizedAndExtendedVector) { |
3482 | SDValue PartAddr; |
3483 | if (Ins[i].PartOffset == 0) { |
3484 | |
3485 | |
3486 | |
3487 | |
3488 | int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), |
3489 | false); |
3490 | PartAddr = DAG.getFrameIndex(FI, PtrVT); |
3491 | return DAG.getLoad( |
3492 | ValVT, dl, Chain, PartAddr, |
3493 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
3494 | } else { |
3495 | |
3496 | |
3497 | |
3498 | |
3499 | int64_t PartBegin = VA.getLocMemOffset(); |
3500 | int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; |
3501 | int FI = MFI.getObjectIndexBegin(); |
3502 | for (; MFI.isFixedObjectIndex(FI); ++FI) { |
3503 | int64_t ObjBegin = MFI.getObjectOffset(FI); |
3504 | int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); |
3505 | if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) |
3506 | break; |
3507 | } |
3508 | if (MFI.isFixedObjectIndex(FI)) { |
3509 | SDValue Addr = |
3510 | DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), |
3511 | DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); |
3512 | return DAG.getLoad( |
3513 | ValVT, dl, Chain, Addr, |
3514 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI, |
3515 | Ins[i].PartOffset)); |
3516 | } |
3517 | } |
3518 | } |
3519 | |
3520 | int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, |
3521 | VA.getLocMemOffset(), isImmutable); |
3522 | |
3523 | |
3524 | if (VA.getLocInfo() == CCValAssign::ZExt) { |
3525 | MFI.setObjectZExt(FI, true); |
3526 | } else if (VA.getLocInfo() == CCValAssign::SExt) { |
3527 | MFI.setObjectSExt(FI, true); |
3528 | } |
3529 | |
3530 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
3531 | SDValue Val = DAG.getLoad( |
3532 | ValVT, dl, Chain, FIN, |
3533 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
3534 | return ExtendedInMem |
3535 | ? (VA.getValVT().isVector() |
3536 | ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) |
3537 | : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) |
3538 | : Val; |
3539 | } |
3540 | |
3541 | |
3542 | static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, |
3543 | const X86Subtarget &Subtarget) { |
3544 | assert(Subtarget.is64Bit()); |
3545 | |
3546 | if (Subtarget.isCallingConvWin64(CallConv)) { |
3547 | static const MCPhysReg GPR64ArgRegsWin64[] = { |
3548 | X86::RCX, X86::RDX, X86::R8, X86::R9 |
3549 | }; |
3550 | return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); |
3551 | } |
3552 | |
3553 | static const MCPhysReg GPR64ArgRegs64Bit[] = { |
3554 | X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 |
3555 | }; |
3556 | return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); |
3557 | } |
3558 | |
3559 | |
3560 | static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF, |
3561 | CallingConv::ID CallConv, |
3562 | const X86Subtarget &Subtarget) { |
3563 | assert(Subtarget.is64Bit()); |
3564 | if (Subtarget.isCallingConvWin64(CallConv)) { |
3565 | |
3566 | |
3567 | |
3568 | |
3569 | return None; |
3570 | } |
3571 | |
3572 | bool isSoftFloat = Subtarget.useSoftFloat(); |
3573 | if (isSoftFloat || !Subtarget.hasSSE1()) |
3574 | |
3575 | |
3576 | return None; |
3577 | |
3578 | static const MCPhysReg XMMArgRegs64Bit[] = { |
3579 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
3580 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
3581 | }; |
3582 | return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); |
3583 | } |
3584 | |
3585 | #ifndef NDEBUG |
3586 | static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) { |
3587 | return llvm::is_sorted( |
3588 | ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool { |
3589 | return A.getValNo() < B.getValNo(); |
3590 | }); |
3591 | } |
3592 | #endif |
3593 | |
3594 | namespace { |
3595 | |
3596 | class VarArgsLoweringHelper { |
3597 | public: |
3598 | VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc, |
3599 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
3600 | CallingConv::ID CallConv, CCState &CCInfo) |
3601 | : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget), |
3602 | TheMachineFunction(DAG.getMachineFunction()), |
3603 | TheFunction(TheMachineFunction.getFunction()), |
3604 | FrameInfo(TheMachineFunction.getFrameInfo()), |
3605 | FrameLowering(*Subtarget.getFrameLowering()), |
3606 | TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv), |
3607 | CCInfo(CCInfo) {} |
3608 | |
3609 | |
3610 | void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize); |
3611 | |
3612 | private: |
3613 | void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize); |
3614 | |
3615 | void forwardMustTailParameters(SDValue &Chain); |
3616 | |
3617 | bool is64Bit() const { return Subtarget.is64Bit(); } |
3618 | bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); } |
3619 | |
3620 | X86MachineFunctionInfo *FuncInfo; |
3621 | const SDLoc &DL; |
3622 | SelectionDAG &DAG; |
3623 | const X86Subtarget &Subtarget; |
3624 | MachineFunction &TheMachineFunction; |
3625 | const Function &TheFunction; |
3626 | MachineFrameInfo &FrameInfo; |
3627 | const TargetFrameLowering &FrameLowering; |
3628 | const TargetLowering &TargLowering; |
3629 | CallingConv::ID CallConv; |
3630 | CCState &CCInfo; |
3631 | }; |
3632 | } |
3633 | |
3634 | void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( |
3635 | SDValue &Chain, unsigned StackSize) { |
3636 | |
3637 | |
3638 | |
3639 | if (is64Bit() || (CallConv != CallingConv::X86_FastCall && |
3640 | CallConv != CallingConv::X86_ThisCall)) { |
3641 | FuncInfo->setVarArgsFrameIndex( |
3642 | FrameInfo.CreateFixedObject(1, StackSize, true)); |
3643 | } |
3644 | |
3645 | |
3646 | |
3647 | if (is64Bit()) { |
3648 | |
3649 | ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); |
3650 | ArrayRef<MCPhysReg> ArgXMMs = |
3651 | get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget); |
3652 | unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); |
3653 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); |
3654 | |
3655 | assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && |
3656 | "SSE register cannot be used when SSE is disabled!"); |
3657 | |
3658 | if (isWin64()) { |
3659 | |
3660 | |
3661 | int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8; |
3662 | FuncInfo->setRegSaveFrameIndex( |
3663 | FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); |
3664 | |
3665 | if (NumIntRegs < 4) |
3666 | FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); |
3667 | } else { |
3668 | |
3669 | |
3670 | |
3671 | FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); |
3672 | FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); |
3673 | FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject( |
3674 | ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false)); |
3675 | } |
3676 | |
3677 | SmallVector<SDValue, 6> |
3678 | LiveGPRs; |
3679 | SmallVector<SDValue, 8> LiveXMMRegs; |
3680 | |
3681 | SDValue ALVal; |
3682 | |
3683 | |
3684 | for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { |
3685 | Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass); |
3686 | LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64)); |
3687 | } |
3688 | const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs); |
3689 | if (!AvailableXmms.empty()) { |
3690 | Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); |
3691 | ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8); |
3692 | for (MCPhysReg Reg : AvailableXmms) { |
3693 | |
3694 | |
3695 | |
3696 | |
3697 | TheMachineFunction.getRegInfo().addLiveIn(Reg); |
3698 | LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32)); |
3699 | } |
3700 | } |
3701 | |
3702 | |
3703 | SmallVector<SDValue, 8> MemOps; |
3704 | SDValue RSFIN = |
3705 | DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), |
3706 | TargLowering.getPointerTy(DAG.getDataLayout())); |
3707 | unsigned Offset = FuncInfo->getVarArgsGPOffset(); |
3708 | for (SDValue Val : LiveGPRs) { |
3709 | SDValue FIN = DAG.getNode(ISD::ADD, DL, |
3710 | TargLowering.getPointerTy(DAG.getDataLayout()), |
3711 | RSFIN, DAG.getIntPtrConstant(Offset, DL)); |
3712 | SDValue Store = |
3713 | DAG.getStore(Val.getValue(1), DL, Val, FIN, |
3714 | MachinePointerInfo::getFixedStack( |
3715 | DAG.getMachineFunction(), |
3716 | FuncInfo->getRegSaveFrameIndex(), Offset)); |
3717 | MemOps.push_back(Store); |
3718 | Offset += 8; |
3719 | } |
3720 | |
3721 | |
3722 | if (!LiveXMMRegs.empty()) { |
3723 | SmallVector<SDValue, 12> SaveXMMOps; |
3724 | SaveXMMOps.push_back(Chain); |
3725 | SaveXMMOps.push_back(ALVal); |
3726 | SaveXMMOps.push_back( |
3727 | DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32)); |
3728 | SaveXMMOps.push_back( |
3729 | DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); |
3730 | llvm::append_range(SaveXMMOps, LiveXMMRegs); |
3731 | MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL, |
3732 | MVT::Other, SaveXMMOps)); |
3733 | } |
3734 | |
3735 | if (!MemOps.empty()) |
3736 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
3737 | } |
3738 | } |
3739 | |
3740 | void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) { |
3741 | |
3742 | MVT VecVT = MVT::Other; |
3743 | |
3744 | if (Subtarget.useAVX512Regs() && |
3745 | (is64Bit() || (CallConv == CallingConv::X86_VectorCall || |
3746 | CallConv == CallingConv::Intel_OCL_BI))) |
3747 | VecVT = MVT::v16f32; |
3748 | else if (Subtarget.hasAVX()) |
3749 | VecVT = MVT::v8f32; |
3750 | else if (Subtarget.hasSSE2()) |
3751 | VecVT = MVT::v4f32; |
3752 | |
3753 | |
3754 | SmallVector<MVT, 2> RegParmTypes; |
3755 | MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32; |
3756 | RegParmTypes.push_back(IntVT); |
3757 | if (VecVT != MVT::Other) |
3758 | RegParmTypes.push_back(VecVT); |
3759 | |
3760 | |
3761 | SmallVectorImpl<ForwardedRegister> &Forwards = |
3762 | FuncInfo->getForwardedMustTailRegParms(); |
3763 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); |
3764 | |
3765 | |
3766 | if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) { |
3767 | Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); |
3768 | Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); |
3769 | } |
3770 | |
3771 | |
3772 | for (ForwardedRegister &FR : Forwards) { |
3773 | |
3774 | SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT); |
3775 | FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister( |
3776 | TargLowering.getRegClassFor(FR.VT)); |
3777 | Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal); |
3778 | } |
3779 | } |
3780 | |
3781 | void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain, |
3782 | unsigned StackSize) { |
3783 | |
3784 | |
3785 | FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); |
3786 | FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); |
3787 | |
3788 | if (FrameInfo.hasVAStart()) |
3789 | createVarArgAreaAndStoreRegisters(Chain, StackSize); |
3790 | |
3791 | if (FrameInfo.hasMustTailInVarArgFunc()) |
3792 | forwardMustTailParameters(Chain); |
3793 | } |
3794 | |
3795 | SDValue X86TargetLowering::LowerFormalArguments( |
3796 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
3797 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
3798 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
3799 | MachineFunction &MF = DAG.getMachineFunction(); |
3800 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
3801 | |
3802 | const Function &F = MF.getFunction(); |
3803 | if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && |
3804 | F.getName() == "main") |
3805 | FuncInfo->setForceFramePointer(true); |
3806 | |
3807 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3808 | bool Is64Bit = Subtarget.is64Bit(); |
3809 | bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); |
3810 | |
3811 | assert( |
3812 | !(IsVarArg && canGuaranteeTCO(CallConv)) && |
3813 | "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"); |
3814 | |
3815 | |
3816 | SmallVector<CCValAssign, 16> ArgLocs; |
3817 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
3818 | |
3819 | |
3820 | if (IsWin64) |
3821 | CCInfo.AllocateStack(32, Align(8)); |
3822 | |
3823 | CCInfo.AnalyzeArguments(Ins, CC_X86); |
3824 | |
3825 | |
3826 | |
3827 | if (CallingConv::X86_VectorCall == CallConv) { |
3828 | CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86); |
3829 | } |
3830 | |
3831 | |
3832 | |
3833 | assert(isSortedByValueNo(ArgLocs) && |
3834 | "Argument Location list must be sorted before lowering"); |
3835 | |
3836 | SDValue ArgValue; |
3837 | for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; |
3838 | ++I, ++InsIndex) { |
3839 | assert(InsIndex < Ins.size() && "Invalid Ins index"); |
3840 | CCValAssign &VA = ArgLocs[I]; |
3841 | |
3842 | if (VA.isRegLoc()) { |
3843 | EVT RegVT = VA.getLocVT(); |
3844 | if (VA.needsCustom()) { |
3845 | assert( |
3846 | VA.getValVT() == MVT::v64i1 && |
3847 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
3848 | |
3849 | |
3850 | |
3851 | ArgValue = |
3852 | getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget); |
3853 | } else { |
3854 | const TargetRegisterClass *RC; |
3855 | if (RegVT == MVT::i8) |
3856 | RC = &X86::GR8RegClass; |
3857 | else if (RegVT == MVT::i16) |
3858 | RC = &X86::GR16RegClass; |
3859 | else if (RegVT == MVT::i32) |
3860 | RC = &X86::GR32RegClass; |
3861 | else if (Is64Bit && RegVT == MVT::i64) |
3862 | RC = &X86::GR64RegClass; |
3863 | else if (RegVT == MVT::f16) |
3864 | RC = &X86::FR16XRegClass; |
3865 | else if (RegVT == MVT::f32) |
3866 | RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; |
3867 | else if (RegVT == MVT::f64) |
3868 | RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; |
3869 | else if (RegVT == MVT::f80) |
3870 | RC = &X86::RFP80RegClass; |
3871 | else if (RegVT == MVT::f128) |
3872 | RC = &X86::VR128RegClass; |
3873 | else if (RegVT.is512BitVector()) |
3874 | RC = &X86::VR512RegClass; |
3875 | else if (RegVT.is256BitVector()) |
3876 | RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass; |
3877 | else if (RegVT.is128BitVector()) |
3878 | RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; |
3879 | else if (RegVT == MVT::x86mmx) |
3880 | RC = &X86::VR64RegClass; |
3881 | else if (RegVT == MVT::v1i1) |
3882 | RC = &X86::VK1RegClass; |
3883 | else if (RegVT == MVT::v8i1) |
3884 | RC = &X86::VK8RegClass; |
3885 | else if (RegVT == MVT::v16i1) |
3886 | RC = &X86::VK16RegClass; |
3887 | else if (RegVT == MVT::v32i1) |
3888 | RC = &X86::VK32RegClass; |
3889 | else if (RegVT == MVT::v64i1) |
3890 | RC = &X86::VK64RegClass; |
3891 | else |
3892 | llvm_unreachable("Unknown argument type!"); |
3893 | |
3894 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); |
3895 | ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); |
3896 | } |
3897 | |
3898 | |
3899 | |
3900 | |
3901 | if (VA.getLocInfo() == CCValAssign::SExt) |
3902 | ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, |
3903 | DAG.getValueType(VA.getValVT())); |
3904 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
3905 | ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, |
3906 | DAG.getValueType(VA.getValVT())); |
3907 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
3908 | ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); |
3909 | |
3910 | if (VA.isExtInLoc()) { |
3911 | |
3912 | if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) |
3913 | ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); |
3914 | else if (VA.getValVT().isVector() && |
3915 | VA.getValVT().getScalarType() == MVT::i1 && |
3916 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
3917 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
3918 | |
3919 | ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); |
3920 | } else |
3921 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); |
3922 | } |
3923 | } else { |
3924 | assert(VA.isMemLoc()); |
3925 | ArgValue = |
3926 | LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex); |
3927 | } |
3928 | |
3929 | |
3930 | if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal()) |
3931 | ArgValue = |
3932 | DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo()); |
3933 | |
3934 | InVals.push_back(ArgValue); |
3935 | } |
3936 | |
3937 | for (unsigned I = 0, E = Ins.size(); I != E; ++I) { |
3938 | if (Ins[I].Flags.isSwiftAsync()) { |
3939 | auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3940 | if (Subtarget.is64Bit()) |
3941 | X86FI->setHasSwiftAsyncContext(true); |
3942 | else { |
3943 | int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false); |
3944 | X86FI->setSwiftAsyncContextFrameIdx(FI); |
3945 | SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I], |
3946 | DAG.getFrameIndex(FI, MVT::i32), |
3947 | MachinePointerInfo::getFixedStack(MF, FI)); |
3948 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain); |
3949 | } |
3950 | } |
3951 | |
3952 | |
3953 | |
3954 | if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) |
3955 | continue; |
3956 | |
3957 | |
3958 | |
3959 | |
3960 | |
3961 | if (Ins[I].Flags.isSRet()) { |
3962 | assert(!FuncInfo->getSRetReturnReg() && |
3963 | "SRet return has already been set"); |
3964 | MVT PtrTy = getPointerTy(DAG.getDataLayout()); |
3965 | Register Reg = |
3966 | MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); |
3967 | FuncInfo->setSRetReturnReg(Reg); |
3968 | SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]); |
3969 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); |
3970 | break; |
3971 | } |
3972 | } |
3973 | |
3974 | unsigned StackSize = CCInfo.getNextStackOffset(); |
3975 | |
3976 | if (shouldGuaranteeTCO(CallConv, |
3977 | MF.getTarget().Options.GuaranteedTailCallOpt)) |
3978 | StackSize = GetAlignedArgumentStackSize(StackSize, DAG); |
3979 | |
3980 | if (IsVarArg) |
3981 | VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo) |
3982 | .lowerVarArgsParameters(Chain, StackSize); |
3983 | |
3984 | |
3985 | if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg, |
3986 | MF.getTarget().Options.GuaranteedTailCallOpt)) { |
3987 | FuncInfo->setBytesToPopOnReturn(StackSize); |
3988 | } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { |
3989 | |
3990 | |
3991 | FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4); |
3992 | } else { |
3993 | FuncInfo->setBytesToPopOnReturn(0); |
3994 | |
3995 | if (!Is64Bit && !canGuaranteeTCO(CallConv) && |
3996 | !Subtarget.getTargetTriple().isOSMSVCRT() && |
3997 | argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn) |
3998 | FuncInfo->setBytesToPopOnReturn(4); |
3999 | } |
4000 | |
4001 | if (!Is64Bit) { |
4002 | |
4003 | FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); |
4004 | } |
4005 | |
4006 | FuncInfo->setArgumentStackSize(StackSize); |
4007 | |
4008 | if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { |
4009 | EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); |
4010 | if (Personality == EHPersonality::CoreCLR) { |
4011 | assert(Is64Bit); |
4012 | |
4013 | |
4014 | |
4015 | |
4016 | |
4017 | |
4018 | |
4019 | |
4020 | int PSPSymFI = MFI.CreateStackObject(8, Align(8), false); |
4021 | EHInfo->PSPSymFrameIdx = PSPSymFI; |
4022 | } |
4023 | } |
4024 | |
4025 | if (CallConv == CallingConv::X86_RegCall || |
4026 | F.hasFnAttribute("no_caller_saved_registers")) { |
4027 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
4028 | for (std::pair<Register, Register> Pair : MRI.liveins()) |
4029 | MRI.disableCalleeSavedRegister(Pair.first); |
4030 | } |
4031 | |
4032 | return Chain; |
4033 | } |
4034 | |
4035 | SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, |
4036 | SDValue Arg, const SDLoc &dl, |
4037 | SelectionDAG &DAG, |
4038 | const CCValAssign &VA, |
4039 | ISD::ArgFlagsTy Flags, |
4040 | bool isByVal) const { |
4041 | unsigned LocMemOffset = VA.getLocMemOffset(); |
4042 | SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); |
4043 | PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
4044 | StackPtr, PtrOff); |
4045 | if (isByVal) |
4046 | return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); |
4047 | |
4048 | return DAG.getStore( |
4049 | Chain, dl, Arg, PtrOff, |
4050 | MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); |
4051 | } |
4052 | |
4053 | |
4054 | |
4055 | SDValue X86TargetLowering::EmitTailCallLoadRetAddr( |
4056 | SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, |
4057 | bool Is64Bit, int FPDiff, const SDLoc &dl) const { |
4058 | |
4059 | EVT VT = getPointerTy(DAG.getDataLayout()); |
4060 | OutRetAddr = getReturnAddressFrameIndex(DAG); |
4061 | |
4062 | |
4063 | OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo()); |
4064 | return SDValue(OutRetAddr.getNode(), 1); |
4065 | } |
4066 | |
4067 | |
4068 | |
4069 | static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, |
4070 | SDValue Chain, SDValue RetAddrFrIdx, |
4071 | EVT PtrVT, unsigned SlotSize, |
4072 | int FPDiff, const SDLoc &dl) { |
4073 | |
4074 | if (!FPDiff) return Chain; |
4075 | |
4076 | int NewReturnAddrFI = |
4077 | MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, |
4078 | false); |
4079 | SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); |
4080 | Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, |
4081 | MachinePointerInfo::getFixedStack( |
4082 | DAG.getMachineFunction(), NewReturnAddrFI)); |
4083 | return Chain; |
4084 | } |
4085 | |
4086 | |
4087 | |
4088 | static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, |
4089 | SDValue V2) { |
4090 | unsigned NumElems = VT.getVectorNumElements(); |
4091 | SmallVector<int, 8> Mask; |
4092 | Mask.push_back(NumElems); |
4093 | for (unsigned i = 1; i != NumElems; ++i) |
4094 | Mask.push_back(i); |
4095 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
4096 | } |
4097 | |
4098 | SDValue |
4099 | X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
4100 | SmallVectorImpl<SDValue> &InVals) const { |
4101 | SelectionDAG &DAG = CLI.DAG; |
4102 | SDLoc &dl = CLI.DL; |
4103 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
4104 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
4105 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
4106 | SDValue Chain = CLI.Chain; |
4107 | SDValue Callee = CLI.Callee; |
4108 | CallingConv::ID CallConv = CLI.CallConv; |
4109 | bool &isTailCall = CLI.IsTailCall; |
4110 | bool isVarArg = CLI.IsVarArg; |
4111 | const auto *CB = CLI.CB; |
4112 | |
4113 | MachineFunction &MF = DAG.getMachineFunction(); |
4114 | bool Is64Bit = Subtarget.is64Bit(); |
4115 | bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); |
4116 | StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); |
4117 | bool IsSibcall = false; |
4118 | bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || |
4119 | CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; |
4120 | X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); |
4121 | bool HasNCSR = (CB && isa<CallInst>(CB) && |
4122 | CB->hasFnAttr("no_caller_saved_registers")); |
4123 | bool HasNoCfCheck = (CB && CB->doesNoCfCheck()); |
4124 | bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall()); |
4125 | const Module *M = MF.getMMI().getModule(); |
4126 | Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); |
4127 | |
4128 | MachineFunction::CallSiteInfo CSInfo; |
4129 | if (CallConv == CallingConv::X86_INTR) |
4130 | report_fatal_error("X86 interrupts may not be called directly"); |
4131 | |
4132 | bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); |
4133 | if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) { |
4134 | |
4135 | |
4136 | |
4137 | |
4138 | |
4139 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); |
4140 | if (!G || (!G->getGlobal()->hasLocalLinkage() && |
4141 | G->getGlobal()->hasDefaultVisibility())) |
4142 | isTailCall = false; |
4143 | } |
4144 | |
4145 | |
4146 | if (isTailCall && !IsMustTail) { |
4147 | |
4148 | isTailCall = IsEligibleForTailCallOptimization( |
4149 | Callee, CallConv, SR == StackStructReturn, isVarArg, CLI.RetTy, Outs, |
4150 | OutVals, Ins, DAG); |
4151 | |
4152 | |
4153 | |
4154 | if (!IsGuaranteeTCO && isTailCall) |
4155 | IsSibcall = true; |
4156 | |
4157 | if (isTailCall) |
4158 | ++NumTailCalls; |
4159 | } |
4160 | |
4161 | if (IsMustTail && !isTailCall) |
4162 | report_fatal_error("failed to perform tail call elimination on a call " |
4163 | "site marked musttail"); |
4164 | |
4165 | assert(!(isVarArg && canGuaranteeTCO(CallConv)) && |
4166 | "Var args not supported with calling convention fastcc, ghc or hipe"); |
4167 | |
4168 | |
4169 | SmallVector<CCValAssign, 16> ArgLocs; |
4170 | CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); |
4171 | |
4172 | |
4173 | if (IsWin64) |
4174 | CCInfo.AllocateStack(32, Align(8)); |
4175 | |
4176 | CCInfo.AnalyzeArguments(Outs, CC_X86); |
4177 | |
4178 | |
4179 | |
4180 | if (CallingConv::X86_VectorCall == CallConv) { |
4181 | CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); |
4182 | } |
4183 | |
4184 | |
4185 | unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); |
4186 | if (IsSibcall) |
4187 | |
4188 | |
4189 | NumBytes = 0; |
4190 | else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) |
4191 | NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); |
4192 | |
4193 | int FPDiff = 0; |
4194 | if (isTailCall && |
4195 | shouldGuaranteeTCO(CallConv, |
4196 | MF.getTarget().Options.GuaranteedTailCallOpt)) { |
4197 | |
4198 | unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); |
4199 | |
4200 | FPDiff = NumBytesCallerPushed - NumBytes; |
4201 | |
4202 | |
4203 | |
4204 | if (FPDiff < X86Info->getTCReturnAddrDelta()) |
4205 | X86Info->setTCReturnAddrDelta(FPDiff); |
4206 | } |
4207 | |
4208 | unsigned NumBytesToPush = NumBytes; |
4209 | unsigned NumBytesToPop = NumBytes; |
4210 | |
4211 | |
4212 | |
4213 | |
4214 | if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { |
4215 | NumBytesToPush = 0; |
4216 | if (!ArgLocs.back().isMemLoc()) |
4217 | report_fatal_error("cannot use inalloca attribute on a register " |
4218 | "parameter"); |
4219 | if (ArgLocs.back().getLocMemOffset() != 0) |
4220 | report_fatal_error("any parameter with the inalloca attribute must be " |
4221 | "the only memory argument"); |
4222 | } else if (CLI.IsPreallocated) { |
4223 | assert(ArgLocs.back().isMemLoc() && |
4224 | "cannot use preallocated attribute on a register " |
4225 | "parameter"); |
4226 | SmallVector<size_t, 4> PreallocatedOffsets; |
4227 | for (size_t i = 0; i < CLI.OutVals.size(); ++i) { |
4228 | if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) { |
4229 | PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset()); |
4230 | } |
4231 | } |
4232 | auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>(); |
4233 | size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB); |
4234 | MFI->setPreallocatedStackSize(PreallocatedId, NumBytes); |
4235 | MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets); |
4236 | NumBytesToPush = 0; |
4237 | } |
4238 | |
4239 | if (!IsSibcall && !IsMustTail) |
4240 | Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, |
4241 | NumBytes - NumBytesToPush, dl); |
4242 | |
4243 | SDValue RetAddrFrIdx; |
4244 | |
4245 | if (isTailCall && FPDiff) |
4246 | Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, |
4247 | Is64Bit, FPDiff, dl); |
4248 | |
4249 | SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; |
4250 | SmallVector<SDValue, 8> MemOpChains; |
4251 | SDValue StackPtr; |
4252 | |
4253 | |
4254 | |
4255 | assert(isSortedByValueNo(ArgLocs) && |
4256 | "Argument Location list must be sorted before lowering"); |
4257 | |
4258 | |
4259 | |
4260 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
4261 | for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; |
4262 | ++I, ++OutIndex) { |
4263 | assert(OutIndex < Outs.size() && "Invalid Out index"); |
4264 | |
4265 | ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; |
4266 | if (Flags.isInAlloca() || Flags.isPreallocated()) |
4267 | continue; |
4268 | |
4269 | CCValAssign &VA = ArgLocs[I]; |
4270 | EVT RegVT = VA.getLocVT(); |
4271 | SDValue Arg = OutVals[OutIndex]; |
4272 | bool isByVal = Flags.isByVal(); |
4273 | |
4274 | |
4275 | switch (VA.getLocInfo()) { |
4276 | default: llvm_unreachable("Unknown loc info!"); |
4277 | case CCValAssign::Full: break; |
4278 | case CCValAssign::SExt: |
4279 | Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); |
4280 | break; |
4281 | case CCValAssign::ZExt: |
4282 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); |
4283 | break; |
4284 | case CCValAssign::AExt: |
4285 | if (Arg.getValueType().isVector() && |
4286 | Arg.getValueType().getVectorElementType() == MVT::i1) |
4287 | Arg = lowerMasksToReg(Arg, RegVT, dl, DAG); |
4288 | else if (RegVT.is128BitVector()) { |
4289 | |
4290 | Arg = DAG.getBitcast(MVT::i64, Arg); |
4291 | Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); |
4292 | Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); |
4293 | } else |
4294 | Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); |
4295 | break; |
4296 | case CCValAssign::BCvt: |
4297 | Arg = DAG.getBitcast(RegVT, Arg); |
4298 | break; |
4299 | case CCValAssign::Indirect: { |
4300 | if (isByVal) { |
4301 | |
4302 | |
4303 | |
4304 | int FrameIdx = MF.getFrameInfo().CreateStackObject( |
4305 | Flags.getByValSize(), |
4306 | std::max(Align(16), Flags.getNonZeroByValAlign()), false); |
4307 | SDValue StackSlot = |
4308 | DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout())); |
4309 | Chain = |
4310 | CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl); |
4311 | |
4312 | Arg = StackSlot; |
4313 | isByVal = false; |
4314 | } else { |
4315 | |
4316 | SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); |
4317 | int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); |
4318 | Chain = DAG.getStore( |
4319 | Chain, dl, Arg, SpillSlot, |
4320 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
4321 | Arg = SpillSlot; |
4322 | } |
4323 | break; |
4324 | } |
4325 | } |
4326 | |
4327 | if (VA.needsCustom()) { |
4328 | assert(VA.getValVT() == MVT::v64i1 && |
4329 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
4330 | |
4331 | Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); |
4332 | } else if (VA.isRegLoc()) { |
4333 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); |
4334 | const TargetOptions &Options = DAG.getTarget().Options; |
4335 | if (Options.EmitCallSiteInfo) |
4336 | CSInfo.emplace_back(VA.getLocReg(), I); |
4337 | if (isVarArg && IsWin64) { |
4338 | |
4339 | |
4340 | Register ShadowReg; |
4341 | switch (VA.getLocReg()) { |
4342 | case X86::XMM0: ShadowReg = X86::RCX; break; |
4343 | case X86::XMM1: ShadowReg = X86::RDX; break; |
4344 | case X86::XMM2: ShadowReg = X86::R8; break; |
4345 | case X86::XMM3: ShadowReg = X86::R9; break; |
4346 | } |
4347 | if (ShadowReg) |
4348 | RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); |
4349 | } |
4350 | } else if (!IsSibcall && (!isTailCall || isByVal)) { |
4351 | assert(VA.isMemLoc()); |
4352 | if (!StackPtr.getNode()) |
4353 | StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), |
4354 | getPointerTy(DAG.getDataLayout())); |
4355 | MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, |
4356 | dl, DAG, VA, Flags, isByVal)); |
4357 | } |
4358 | } |
4359 | |
4360 | if (!MemOpChains.empty()) |
4361 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); |
4362 | |
4363 | if (Subtarget.isPICStyleGOT()) { |
4364 | |
4365 | |
4366 | if (!isTailCall) { |
4367 | |
4368 | |
4369 | |
4370 | if (CallConv != CallingConv::X86_RegCall) |
4371 | RegsToPass.push_back(std::make_pair( |
4372 | Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), |
4373 | getPointerTy(DAG.getDataLayout())))); |
4374 | } else { |
4375 | |
4376 | |
4377 | |
4378 | |
4379 | |
4380 | |
4381 | |
4382 | |
4383 | |
4384 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); |
4385 | if (G && !G->getGlobal()->hasLocalLinkage() && |
4386 | G->getGlobal()->hasDefaultVisibility()) |
4387 | Callee = LowerGlobalAddress(Callee, DAG); |
4388 | else if (isa<ExternalSymbolSDNode>(Callee)) |
4389 | Callee = LowerExternalSymbol(Callee, DAG); |
4390 | } |
4391 | } |
4392 | |
4393 | if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) { |
4394 | |
4395 | |
4396 | |
4397 | |
4398 | |
4399 | |
4400 | |
4401 | |
4402 | |
4403 | static const MCPhysReg XMMArgRegs[] = { |
4404 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
4405 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
4406 | }; |
4407 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); |
4408 | assert((Subtarget.hasSSE1() || !NumXMMRegs) |
4409 | && "SSE registers cannot be used when SSE is disabled"); |
4410 | RegsToPass.push_back(std::make_pair(Register(X86::AL), |
4411 | DAG.getConstant(NumXMMRegs, dl, |
4412 | MVT::i8))); |
4413 | } |
4414 | |
4415 | if (isVarArg && IsMustTail) { |
4416 | const auto &Forwards = X86Info->getForwardedMustTailRegParms(); |
4417 | for (const auto &F : Forwards) { |
4418 | SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); |
4419 | RegsToPass.push_back(std::make_pair(F.PReg, Val)); |
4420 | } |
4421 | } |
4422 | |
4423 | |
4424 | |
4425 | |
4426 | if (!IsSibcall && isTailCall) { |
4427 | |
4428 | |
4429 | |
4430 | |
4431 | |
4432 | |
4433 | SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); |
4434 | |
4435 | SmallVector<SDValue, 8> MemOpChains2; |
4436 | SDValue FIN; |
4437 | int FI = 0; |
4438 | for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E; |
4439 | ++I, ++OutsIndex) { |
4440 | CCValAssign &VA = ArgLocs[I]; |
4441 | |
4442 | if (VA.isRegLoc()) { |
4443 | if (VA.needsCustom()) { |
4444 | assert((CallConv == CallingConv::X86_RegCall) && |
4445 | "Expecting custom case only in regcall calling convention"); |
4446 | |
4447 | |
4448 | ++I; |
4449 | } |
4450 | |
4451 | continue; |
4452 | } |
4453 | |
4454 | assert(VA.isMemLoc()); |
4455 | SDValue Arg = OutVals[OutsIndex]; |
4456 | ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; |
4457 | |
4458 | if (Flags.isInAlloca() || Flags.isPreallocated()) |
4459 | continue; |
4460 | |
4461 | int32_t Offset = VA.getLocMemOffset()+FPDiff; |
4462 | uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; |
4463 | FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); |
4464 | FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); |
4465 | |
4466 | if (Flags.isByVal()) { |
4467 | |
4468 | SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); |
4469 | if (!StackPtr.getNode()) |
4470 | StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), |
4471 | getPointerTy(DAG.getDataLayout())); |
4472 | Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
4473 | StackPtr, Source); |
4474 | |
4475 | MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, |
4476 | ArgChain, |
4477 | Flags, DAG, dl)); |
4478 | } else { |
4479 | |
4480 | MemOpChains2.push_back(DAG.getStore( |
4481 | ArgChain, dl, Arg, FIN, |
4482 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); |
4483 | } |
4484 | } |
4485 | |
4486 | if (!MemOpChains2.empty()) |
4487 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); |
4488 | |
4489 | |
4490 | Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, |
4491 | getPointerTy(DAG.getDataLayout()), |
4492 | RegInfo->getSlotSize(), FPDiff, dl); |
4493 | } |
4494 | |
4495 | |
4496 | |
4497 | SDValue InFlag; |
4498 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
4499 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
4500 | RegsToPass[i].second, InFlag); |
4501 | InFlag = Chain.getValue(1); |
4502 | } |
4503 | |
4504 | if (DAG.getTarget().getCodeModel() == CodeModel::Large) { |
4505 | assert(Is64Bit && "Large code model is only legal in 64-bit mode."); |
4506 | |
4507 | |
4508 | |
4509 | |
4510 | } else if (Callee->getOpcode() == ISD::GlobalAddress || |
4511 | Callee->getOpcode() == ISD::ExternalSymbol) { |
4512 | |
4513 | |
4514 | |
4515 | |
4516 | Callee = LowerGlobalOrExternal(Callee, DAG, true); |
4517 | } else if (Subtarget.isTarget64BitILP32() && |
4518 | Callee->getValueType(0) == MVT::i32) { |
4519 | |
4520 | Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); |
4521 | } |
4522 | |
4523 | |
4524 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
4525 | SmallVector<SDValue, 8> Ops; |
4526 | |
4527 | if (!IsSibcall && isTailCall && !IsMustTail) { |
4528 | Chain = DAG.getCALLSEQ_END(Chain, |
4529 | DAG.getIntPtrConstant(NumBytesToPop, dl, true), |
4530 | DAG.getIntPtrConstant(0, dl, true), InFlag, dl); |
4531 | InFlag = Chain.getValue(1); |
4532 | } |
4533 | |
4534 | Ops.push_back(Chain); |
4535 | Ops.push_back(Callee); |
4536 | |
4537 | if (isTailCall) |
4538 | Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32)); |
4539 | |
4540 | |
4541 | |
4542 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) |
4543 | Ops.push_back(DAG.getRegister(RegsToPass[i].first, |
4544 | RegsToPass[i].second.getValueType())); |
4545 | |
4546 | |
4547 | const uint32_t *Mask = [&]() { |
4548 | auto AdaptedCC = CallConv; |
4549 | |
4550 | |
4551 | |
4552 | if (HasNCSR) |
4553 | AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR; |
4554 | |
4555 | |
4556 | if (CB && CB->hasFnAttr("no_callee_saved_registers")) |
4557 | AdaptedCC = (CallingConv::ID)CallingConv::GHC; |
4558 | return RegInfo->getCallPreservedMask(MF, AdaptedCC); |
4559 | }(); |
4560 | assert(Mask && "Missing call preserved mask for calling convention"); |
4561 | |
4562 | |
4563 | |
4564 | |
4565 | |
4566 | |
4567 | if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) { |
4568 | const Function &CallerFn = MF.getFunction(); |
4569 | EHPersonality Pers = |
4570 | CallerFn.hasPersonalityFn() |
4571 | ? classifyEHPersonality(CallerFn.getPersonalityFn()) |
4572 | : EHPersonality::Unknown; |
4573 | if (isFuncletEHPersonality(Pers)) |
4574 | Mask = RegInfo->getNoPreservedMask(); |
4575 | } |
4576 | |
4577 | |
4578 | uint32_t *RegMask = nullptr; |
4579 | |
4580 | |
4581 | |
4582 | if (CallConv == CallingConv::X86_RegCall || HasNCSR) { |
4583 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4584 | |
4585 | |
4586 | RegMask = MF.allocateRegMask(); |
4587 | unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); |
4588 | memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize); |
4589 | |
4590 | |
4591 | |
4592 | for (auto const &RegPair : RegsToPass) |
4593 | for (MCSubRegIterator SubRegs(RegPair.first, TRI, true); |
4594 | SubRegs.isValid(); ++SubRegs) |
4595 | RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); |
4596 | |
4597 | |
4598 | Ops.push_back(DAG.getRegisterMask(RegMask)); |
4599 | } else { |
4600 | |
4601 | Ops.push_back(DAG.getRegisterMask(Mask)); |
4602 | } |
4603 | |
4604 | if (InFlag.getNode()) |
4605 | Ops.push_back(InFlag); |
4606 | |
4607 | if (isTailCall) { |
4608 | |
4609 | |
4610 | |
4611 | |
4612 | |
4613 | |
4614 | MF.getFrameInfo().setHasTailCall(); |
4615 | SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); |
4616 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); |
4617 | return Ret; |
4618 | } |
4619 | |
4620 | if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) { |
4621 | Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); |
4622 | } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { |
4623 | |
4624 | |
4625 | |
4626 | assert(!isTailCall && |
4627 | "tail calls cannot be marked with clang.arc.attachedcall"); |
4628 | assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"); |
4629 | |
4630 | |
4631 | |
4632 | |
4633 | |
4634 | unsigned RuntimeCallType = |
4635 | objcarc::hasAttachedCallOpBundle(CLI.CB, true) ? 0 : 1; |
4636 | Ops.insert(Ops.begin() + 1, |
4637 | DAG.getTargetConstant(RuntimeCallType, dl, MVT::i32)); |
4638 | Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops); |
4639 | } else { |
4640 | Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); |
4641 | } |
4642 | |
4643 | InFlag = Chain.getValue(1); |
4644 | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); |
4645 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); |
4646 | |
4647 | |
4648 | if (CLI.CB) |
4649 | if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite")) |
4650 | DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); |
4651 | |
4652 | |
4653 | unsigned NumBytesForCalleeToPop; |
4654 | if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, |
4655 | DAG.getTarget().Options.GuaranteedTailCallOpt)) |
4656 | NumBytesForCalleeToPop = NumBytes; |
4657 | else if (!Is64Bit && !canGuaranteeTCO(CallConv) && |
4658 | !Subtarget.getTargetTriple().isOSMSVCRT() && |
4659 | SR == StackStructReturn) |
4660 | |
4661 | |
4662 | |
4663 | |
4664 | NumBytesForCalleeToPop = 4; |
4665 | else |
4666 | NumBytesForCalleeToPop = 0; |
4667 | |
4668 | |
4669 | if (!IsSibcall) { |
4670 | Chain = DAG.getCALLSEQ_END(Chain, |
4671 | DAG.getIntPtrConstant(NumBytesToPop, dl, true), |
4672 | DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, |
4673 | true), |
4674 | InFlag, dl); |
4675 | InFlag = Chain.getValue(1); |
4676 | } |
4677 | |
4678 | |
4679 | |
4680 | return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, |
4681 | InVals, RegMask); |
4682 | } |
4683 | |
4684 | |
4685 | |
4686 | |
4687 | |
4688 | |
4689 | |
4690 | |
4691 | |
4692 | |
4693 | |
4694 | |
4695 | |
4696 | |
4697 | |
4698 | |
4699 | |
4700 | |
4701 | |
4702 | |
4703 | |
4704 | |
4705 | |
4706 | |
4707 | |
4708 | |
4709 | |
4710 | |
4711 | |
4712 | |
4713 | |
4714 | |
4715 | |
4716 | |
4717 | unsigned |
4718 | X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize, |
4719 | SelectionDAG &DAG) const { |
4720 | const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign(); |
4721 | const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize(); |
4722 | assert(StackSize % SlotSize == 0 && |
4723 | "StackSize must be a multiple of SlotSize"); |
4724 | return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize; |
4725 | } |
4726 | |
4727 | |
4728 | |
4729 | static |
4730 | bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, |
4731 | MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, |
4732 | const X86InstrInfo *TII, const CCValAssign &VA) { |
4733 | unsigned Bytes = Arg.getValueSizeInBits() / 8; |
4734 | |
4735 | for (;;) { |
4736 | |
4737 | unsigned Op = Arg.getOpcode(); |
4738 | if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) { |
4739 | Arg = Arg.getOperand(0); |
4740 | continue; |
4741 | } |
4742 | if (Op == ISD::TRUNCATE) { |
4743 | const SDValue &TruncInput = Arg.getOperand(0); |
4744 | if (TruncInput.getOpcode() == ISD::AssertZext && |
4745 | cast<VTSDNode>(TruncInput.getOperand(1))->getVT() == |
4746 | Arg.getValueType()) { |
4747 | Arg = TruncInput.getOperand(0); |
4748 | continue; |
4749 | } |
4750 | } |
4751 | break; |
4752 | } |
4753 | |
4754 | int FI = INT_MAX; |
4755 | if (Arg.getOpcode() == ISD::CopyFromReg) { |
4756 | Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); |
4757 | if (!VR.isVirtual()) |
4758 | return false; |
4759 | MachineInstr *Def = MRI->getVRegDef(VR); |
4760 | if (!Def) |
4761 | return false; |
4762 | if (!Flags.isByVal()) { |
4763 | if (!TII->isLoadFromStackSlot(*Def, FI)) |
4764 | return false; |
4765 | } else { |
4766 | unsigned Opcode = Def->getOpcode(); |
4767 | if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || |
4768 | Opcode == X86::LEA64_32r) && |
4769 | Def->getOperand(1).isFI()) { |
4770 | FI = Def->getOperand(1).getIndex(); |
4771 | Bytes = Flags.getByValSize(); |
4772 | } else |
4773 | return false; |
4774 | } |
4775 | } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { |
4776 | if (Flags.isByVal()) |
4777 | |
4778 | |
4779 | |
4780 | |
4781 | |
4782 | return false; |
4783 | SDValue Ptr = Ld->getBasePtr(); |
4784 | FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); |
4785 | if (!FINode) |
4786 | return false; |
4787 | FI = FINode->getIndex(); |
4788 | } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { |
4789 | FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); |
4790 | FI = FINode->getIndex(); |
4791 | Bytes = Flags.getByValSize(); |
4792 | } else |
4793 | return false; |
4794 | |
4795 | assert(FI != INT_MAX); |
4796 | if (!MFI.isFixedObjectIndex(FI)) |
4797 | return false; |
4798 | |
4799 | if (Offset != MFI.getObjectOffset(FI)) |
4800 | return false; |
4801 | |
4802 | |
4803 | |
4804 | |
4805 | |
4806 | if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) |
4807 | return false; |
4808 | |
4809 | if (VA.getLocVT().getFixedSizeInBits() > |
4810 | Arg.getValueSizeInBits().getFixedSize()) { |
4811 | |
4812 | |
4813 | if (Flags.isZExt() != MFI.isObjectZExt(FI) || |
4814 | Flags.isSExt() != MFI.isObjectSExt(FI)) { |
4815 | return false; |
4816 | } |
4817 | } |
4818 | |
4819 | return Bytes == MFI.getObjectSize(FI); |
4820 | } |
4821 | |
4822 | |
4823 | |
4824 | bool X86TargetLowering::IsEligibleForTailCallOptimization( |
4825 | SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet, |
4826 | bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, |
4827 | const SmallVectorImpl<SDValue> &OutVals, |
4828 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const { |
4829 | if (!mayTailCallThisCC(CalleeCC)) |
4830 | return false; |
4831 | |
4832 | |
4833 | MachineFunction &MF = DAG.getMachineFunction(); |
4834 | const Function &CallerF = MF.getFunction(); |
4835 | |
4836 | |
4837 | |
4838 | |
4839 | if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) |
4840 | return false; |
4841 | |
4842 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
4843 | bool CCMatch = CallerCC == CalleeCC; |
4844 | bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); |
4845 | bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); |
4846 | bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || |
4847 | CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; |
4848 | |
4849 | |
4850 | |
4851 | |
4852 | if (IsCalleeWin64 != IsCallerWin64) |
4853 | return false; |
4854 | |
4855 | if (IsGuaranteeTCO) { |
4856 | if (canGuaranteeTCO(CalleeCC) && CCMatch) |
4857 | return true; |
4858 | return false; |
4859 | } |
4860 | |
4861 | |
4862 | |
4863 | |
4864 | |
4865 | |
4866 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
4867 | if (RegInfo->hasStackRealignment(MF)) |
4868 | return false; |
4869 | |
4870 | |
4871 | |
4872 | |
4873 | if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) { |
4874 | |
4875 | |
4876 | |
4877 | return false; |
4878 | } else if (Subtarget.is32Bit() && IsCalleeStackStructRet) |
4879 | |
4880 | |
4881 | return false; |
4882 | |
4883 | |
4884 | |
4885 | LLVMContext &C = *DAG.getContext(); |
4886 | if (isVarArg && !Outs.empty()) { |
4887 | |
4888 | |
4889 | if (IsCalleeWin64 || IsCallerWin64) |
4890 | return false; |
4891 | |
4892 | SmallVector<CCValAssign, 16> ArgLocs; |
4893 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
4894 | |
4895 | CCInfo.AnalyzeCallOperands(Outs, CC_X86); |
4896 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) |
4897 | if (!ArgLocs[i].isRegLoc()) |
4898 | return false; |
4899 | } |
4900 | |
4901 | |
4902 | |
4903 | |
4904 | bool Unused = false; |
4905 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
4906 | if (!Ins[i].Used) { |
4907 | Unused = true; |
4908 | break; |
4909 | } |
4910 | } |
4911 | if (Unused) { |
4912 | SmallVector<CCValAssign, 16> RVLocs; |
4913 | CCState CCInfo(CalleeCC, false, MF, RVLocs, C); |
4914 | CCInfo.AnalyzeCallResult(Ins, RetCC_X86); |
4915 | for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { |
4916 | CCValAssign &VA = RVLocs[i]; |
4917 | if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) |
4918 | return false; |
4919 | } |
4920 | } |
4921 | |
4922 | |
4923 | if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, |
4924 | RetCC_X86, RetCC_X86)) |
4925 | return false; |
4926 | |
4927 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4928 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
4929 | if (!CCMatch) { |
4930 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
4931 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
4932 | return false; |
4933 | } |
4934 | |
4935 | unsigned StackArgsSize = 0; |
4936 | |
4937 | |
4938 | |
4939 | if (!Outs.empty()) { |
4940 | |
4941 | |
4942 | SmallVector<CCValAssign, 16> ArgLocs; |
4943 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
4944 | |
4945 | |
4946 | if (IsCalleeWin64) |
4947 | CCInfo.AllocateStack(32, Align(8)); |
4948 | |
4949 | CCInfo.AnalyzeCallOperands(Outs, CC_X86); |
4950 | StackArgsSize = CCInfo.getNextStackOffset(); |
4951 | |
4952 | if (CCInfo.getNextStackOffset()) { |
4953 | |
4954 | |
4955 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4956 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
4957 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
4958 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
4959 | CCValAssign &VA = ArgLocs[i]; |
4960 | SDValue Arg = OutVals[i]; |
4961 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
4962 | if (VA.getLocInfo() == CCValAssign::Indirect) |
4963 | return false; |
4964 | if (!VA.isRegLoc()) { |
4965 | if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, |
4966 | MFI, MRI, TII, VA)) |
4967 | return false; |
4968 | } |
4969 | } |
4970 | } |
4971 | |
4972 | bool PositionIndependent = isPositionIndependent(); |
4973 | |
4974 | |
4975 | |
4976 | |
4977 | |
4978 | if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) && |
4979 | !isa<ExternalSymbolSDNode>(Callee)) || |
4980 | PositionIndependent)) { |
4981 | unsigned NumInRegs = 0; |
4982 | |
4983 | |
4984 | unsigned MaxInRegs = PositionIndependent ? 2 : 3; |
4985 | |
4986 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
4987 | CCValAssign &VA = ArgLocs[i]; |
4988 | if (!VA.isRegLoc()) |
4989 | continue; |
4990 | Register Reg = VA.getLocReg(); |
4991 | switch (Reg) { |
4992 | default: break; |
4993 | case X86::EAX: case X86::EDX: case X86::ECX: |
4994 | if (++NumInRegs == MaxInRegs) |
4995 | return false; |
4996 | break; |
4997 | } |
4998 | } |
4999 | } |
5000 | |
5001 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
5002 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) |
5003 | return false; |
5004 | } |
5005 | |
5006 | bool CalleeWillPop = |
5007 | X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, |
5008 | MF.getTarget().Options.GuaranteedTailCallOpt); |
5009 | |
5010 | if (unsigned BytesToPop = |
5011 | MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { |
5012 | |
5013 | bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; |
5014 | if (!CalleePopMatches) |
5015 | return false; |
5016 | } else if (CalleeWillPop && StackArgsSize > 0) { |
5017 | |
5018 | return false; |
5019 | } |
5020 | |
5021 | return true; |
5022 | } |
5023 | |
5024 | FastISel * |
5025 | X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
5026 | const TargetLibraryInfo *libInfo) const { |
5027 | return X86::createFastISel(funcInfo, libInfo); |
5028 | } |
5029 | |
5030 | |
5031 | |
5032 | |
5033 | |
5034 | static bool MayFoldLoad(SDValue Op, bool AssumeSingleUse = false) { |
5035 | return (AssumeSingleUse || Op.hasOneUse()) && ISD::isNormalLoad(Op.getNode()); |
5036 | } |
5037 | |
5038 | static bool MayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, |
5039 | bool AssumeSingleUse = false) { |
5040 | if (!MayFoldLoad(Op, AssumeSingleUse)) |
5041 | return false; |
5042 | |
5043 | |
5044 | |
5045 | const LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op.getNode()); |
5046 | return !Ld->isVolatile() || |
5047 | Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits(); |
5048 | } |
5049 | |
5050 | static bool MayFoldIntoStore(SDValue Op) { |
5051 | return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); |
5052 | } |
5053 | |
5054 | static bool MayFoldIntoZeroExtend(SDValue Op) { |
5055 | if (Op.hasOneUse()) { |
5056 | unsigned Opcode = Op.getNode()->use_begin()->getOpcode(); |
5057 | return (ISD::ZERO_EXTEND == Opcode); |
5058 | } |
5059 | return false; |
5060 | } |
5061 | |
5062 | static bool isTargetShuffle(unsigned Opcode) { |
5063 | switch(Opcode) { |
5064 | default: return false; |
5065 | case X86ISD::BLENDI: |
5066 | case X86ISD::PSHUFB: |
5067 | case X86ISD::PSHUFD: |
5068 | case X86ISD::PSHUFHW: |
5069 | case X86ISD::PSHUFLW: |
5070 | case X86ISD::SHUFP: |
5071 | case X86ISD::INSERTPS: |
5072 | case X86ISD::EXTRQI: |
5073 | case X86ISD::INSERTQI: |
5074 | case X86ISD::VALIGN: |
5075 | case X86ISD::PALIGNR: |
5076 | case X86ISD::VSHLDQ: |
5077 | case X86ISD::VSRLDQ: |
5078 | case X86ISD::MOVLHPS: |
5079 | case X86ISD::MOVHLPS: |
5080 | case X86ISD::MOVSHDUP: |
5081 | case X86ISD::MOVSLDUP: |
5082 | case X86ISD::MOVDDUP: |
5083 | case X86ISD::MOVSS: |
5084 | case X86ISD::MOVSD: |
5085 | case X86ISD::MOVSH: |
5086 | case X86ISD::UNPCKL: |
5087 | case X86ISD::UNPCKH: |
5088 | case X86ISD::VBROADCAST: |
5089 | case X86ISD::VPERMILPI: |
5090 | case X86ISD::VPERMILPV: |
5091 | case X86ISD::VPERM2X128: |
5092 | case X86ISD::SHUF128: |
5093 | case X86ISD::VPERMIL2: |
5094 | case X86ISD::VPERMI: |
5095 | case X86ISD::VPPERM: |
5096 | case X86ISD::VPERMV: |
5097 | case X86ISD::VPERMV3: |
5098 | case X86ISD::VZEXT_MOVL: |
5099 | return true; |
5100 | } |
5101 | } |
5102 | |
5103 | static bool isTargetShuffleVariableMask(unsigned Opcode) { |
5104 | switch (Opcode) { |
5105 | default: return false; |
5106 | |
5107 | case X86ISD::PSHUFB: |
5108 | case X86ISD::VPERMILPV: |
5109 | case X86ISD::VPERMIL2: |
5110 | case X86ISD::VPPERM: |
5111 | case X86ISD::VPERMV: |
5112 | case X86ISD::VPERMV3: |
5113 | return true; |
5114 | |
5115 | case ISD::OR: |
5116 | case ISD::AND: |
5117 | case X86ISD::ANDNP: |
5118 | return true; |
5119 | } |
5120 | } |
5121 | |
5122 | static bool isTargetShuffleSplat(SDValue Op) { |
5123 | unsigned Opcode = Op.getOpcode(); |
5124 | if (Opcode == ISD::EXTRACT_SUBVECTOR) |
5125 | return isTargetShuffleSplat(Op.getOperand(0)); |
5126 | return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD; |
5127 | } |
5128 | |
5129 | SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { |
5130 | MachineFunction &MF = DAG.getMachineFunction(); |
5131 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
5132 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
5133 | int ReturnAddrIndex = FuncInfo->getRAIndex(); |
5134 | |
5135 | if (ReturnAddrIndex == 0) { |
5136 | |
5137 | unsigned SlotSize = RegInfo->getSlotSize(); |
5138 | ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize, |
5139 | -(int64_t)SlotSize, |
5140 | false); |
5141 | FuncInfo->setRAIndex(ReturnAddrIndex); |
5142 | } |
5143 | |
5144 | return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout())); |
5145 | } |
5146 | |
5147 | bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, |
5148 | bool hasSymbolicDisplacement) { |
5149 | |
5150 | if (!isInt<32>(Offset)) |
5151 | return false; |
5152 | |
5153 | |
5154 | |
5155 | if (!hasSymbolicDisplacement) |
5156 | return true; |
5157 | |
5158 | |
5159 | if (M != CodeModel::Small && M != CodeModel::Kernel) |
5160 | return false; |
5161 | |
5162 | |
5163 | |
5164 | |
5165 | if (M == CodeModel::Small && Offset < 16*1024*1024) |
5166 | return true; |
5167 | |
5168 | |
5169 | |
5170 | |
5171 | if (M == CodeModel::Kernel && Offset >= 0) |
5172 | return true; |
5173 | |
5174 | return false; |
5175 | } |
5176 | |
5177 | |
5178 | |
5179 | bool X86::isCalleePop(CallingConv::ID CallingConv, |
5180 | bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { |
5181 | |
5182 | |
5183 | if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) |
5184 | return true; |
5185 | |
5186 | switch (CallingConv) { |
5187 | default: |
5188 | return false; |
5189 | case CallingConv::X86_StdCall: |
5190 | case CallingConv::X86_FastCall: |
5191 | case CallingConv::X86_ThisCall: |
5192 | case CallingConv::X86_VectorCall: |
5193 | return !is64Bit; |
5194 | } |
5195 | } |
5196 | |
5197 | |
5198 | static bool isX86CCSigned(unsigned X86CC) { |
5199 | switch (X86CC) { |
5200 | default: |
5201 | llvm_unreachable("Invalid integer condition!"); |
5202 | case X86::COND_E: |
5203 | case X86::COND_NE: |
5204 | case X86::COND_B: |
5205 | case X86::COND_A: |
5206 | case X86::COND_BE: |
5207 | case X86::COND_AE: |
5208 | return false; |
5209 | case X86::COND_G: |
5210 | case X86::COND_GE: |
5211 | case X86::COND_L: |
5212 | case X86::COND_LE: |
5213 | return true; |
5214 | } |
5215 | } |
5216 | |
5217 | static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { |
5218 | switch (SetCCOpcode) { |
5219 | default: llvm_unreachable("Invalid integer condition!"); |
5220 | case ISD::SETEQ: return X86::COND_E; |
5221 | case ISD::SETGT: return X86::COND_G; |
5222 | case ISD::SETGE: return X86::COND_GE; |
5223 | case ISD::SETLT: return X86::COND_L; |
5224 | case ISD::SETLE: return X86::COND_LE; |
5225 | case ISD::SETNE: return X86::COND_NE; |
5226 | case ISD::SETULT: return X86::COND_B; |
5227 | case ISD::SETUGT: return X86::COND_A; |
5228 | case ISD::SETULE: return X86::COND_BE; |
5229 | case ISD::SETUGE: return X86::COND_AE; |
5230 | } |
5231 | } |
5232 | |
5233 | |
5234 | |
5235 | |
5236 | static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, |
5237 | bool isFP, SDValue &LHS, SDValue &RHS, |
5238 | SelectionDAG &DAG) { |
5239 | if (!isFP) { |
5240 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { |
5241 | if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { |
5242 | |
5243 | RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
5244 | return X86::COND_NS; |
5245 | } |
5246 | if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { |
5247 | |
5248 | return X86::COND_S; |
5249 | } |
5250 | if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) { |
5251 | |
5252 | return X86::COND_NS; |
5253 | } |
5254 | if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) { |
5255 | |
5256 | RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
5257 | return X86::COND_LE; |
5258 | } |
5259 | } |
5260 | |
5261 | return TranslateIntegerX86CC(SetCCOpcode); |
5262 | } |
5263 | |
5264 | |
5265 | |
5266 | |
5267 | if (ISD::isNON_EXTLoad(LHS.getNode()) && |
5268 | !ISD::isNON_EXTLoad(RHS.getNode())) { |
5269 | SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); |
5270 | std::swap(LHS, RHS); |
5271 | } |
5272 | |
5273 | switch (SetCCOpcode) { |
5274 | default: break; |
5275 | case ISD::SETOLT: |
5276 | case ISD::SETOLE: |
5277 | case ISD::SETUGT: |
5278 | case ISD::SETUGE: |
5279 | std::swap(LHS, RHS); |
5280 | break; |
5281 | } |
5282 | |
5283 | |
5284 | |
5285 | |
5286 | |
5287 | |
5288 | |
5289 | switch (SetCCOpcode) { |
5290 | default: llvm_unreachable("Condcode should be pre-legalized away"); |
5291 | case ISD::SETUEQ: |
5292 | case ISD::SETEQ: return X86::COND_E; |
5293 | case ISD::SETOLT: |
5294 | case ISD::SETOGT: |
5295 | case ISD::SETGT: return X86::COND_A; |
5296 | case ISD::SETOLE: |
5297 | case ISD::SETOGE: |
5298 | case ISD::SETGE: return X86::COND_AE; |
5299 | case ISD::SETUGT: |
5300 | case ISD::SETULT: |
5301 | case ISD::SETLT: return X86::COND_B; |
5302 | case ISD::SETUGE: |
5303 | case ISD::SETULE: |
5304 | case ISD::SETLE: return X86::COND_BE; |
5305 | case ISD::SETONE: |
5306 | case ISD::SETNE: return X86::COND_NE; |
5307 | case ISD::SETUO: return X86::COND_P; |
5308 | case ISD::SETO: return X86::COND_NP; |
5309 | case ISD::SETOEQ: |
5310 | case ISD::SETUNE: return X86::COND_INVALID; |
5311 | } |
5312 | } |
5313 | |
5314 | |
5315 | |
5316 | |
5317 | static bool hasFPCMov(unsigned X86CC) { |
5318 | switch (X86CC) { |
5319 | default: |
5320 | return false; |
5321 | case X86::COND_B: |
5322 | case X86::COND_BE: |
5323 | case X86::COND_E: |
5324 | case X86::COND_P: |
5325 | case X86::COND_A: |
5326 | case X86::COND_AE: |
5327 | case X86::COND_NE: |
5328 | case X86::COND_NP: |
5329 | return true; |
5330 | } |
5331 | } |
5332 | |
5333 | |
5334 | bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
5335 | const CallInst &I, |
5336 | MachineFunction &MF, |
5337 | unsigned Intrinsic) const { |
5338 | Info.flags = MachineMemOperand::MONone; |
5339 | Info.offset = 0; |
5340 | |
5341 | const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); |
5342 | if (!IntrData) { |
5343 | switch (Intrinsic) { |
5344 | case Intrinsic::x86_aesenc128kl: |
5345 | case Intrinsic::x86_aesdec128kl: |
5346 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5347 | Info.ptrVal = I.getArgOperand(1); |
5348 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); |
5349 | Info.align = Align(1); |
5350 | Info.flags |= MachineMemOperand::MOLoad; |
5351 | return true; |
5352 | case Intrinsic::x86_aesenc256kl: |
5353 | case Intrinsic::x86_aesdec256kl: |
5354 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5355 | Info.ptrVal = I.getArgOperand(1); |
5356 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); |
5357 | Info.align = Align(1); |
5358 | Info.flags |= MachineMemOperand::MOLoad; |
5359 | return true; |
5360 | case Intrinsic::x86_aesencwide128kl: |
5361 | case Intrinsic::x86_aesdecwide128kl: |
5362 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5363 | Info.ptrVal = I.getArgOperand(0); |
5364 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); |
5365 | Info.align = Align(1); |
5366 | Info.flags |= MachineMemOperand::MOLoad; |
5367 | return true; |
5368 | case Intrinsic::x86_aesencwide256kl: |
5369 | case Intrinsic::x86_aesdecwide256kl: |
5370 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5371 | Info.ptrVal = I.getArgOperand(0); |
5372 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); |
5373 | Info.align = Align(1); |
5374 | Info.flags |= MachineMemOperand::MOLoad; |
5375 | return true; |
5376 | } |
5377 | return false; |
5378 | } |
5379 | |
5380 | switch (IntrData->Type) { |
5381 | case TRUNCATE_TO_MEM_VI8: |
5382 | case TRUNCATE_TO_MEM_VI16: |
5383 | case TRUNCATE_TO_MEM_VI32: { |
5384 | Info.opc = ISD::INTRINSIC_VOID; |
5385 | Info.ptrVal = I.getArgOperand(0); |
5386 | MVT VT = MVT::getVT(I.getArgOperand(1)->getType()); |
5387 | MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE; |
5388 | if (IntrData->Type == TRUNCATE_TO_MEM_VI8) |
5389 | ScalarVT = MVT::i8; |
5390 | else if (IntrData->Type == TRUNCATE_TO_MEM_VI16) |
5391 | ScalarVT = MVT::i16; |
5392 | else if (IntrData->Type == TRUNCATE_TO_MEM_VI32) |
5393 | ScalarVT = MVT::i32; |
5394 | |
5395 | Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements()); |
5396 | Info.align = Align(1); |
5397 | Info.flags |= MachineMemOperand::MOStore; |
5398 | break; |
5399 | } |
5400 | case GATHER: |
5401 | case GATHER_AVX2: { |
5402 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5403 | Info.ptrVal = nullptr; |
5404 | MVT DataVT = MVT::getVT(I.getType()); |
5405 | MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); |
5406 | unsigned NumElts = std::min(DataVT.getVectorNumElements(), |
5407 | IndexVT.getVectorNumElements()); |
5408 | Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); |
5409 | Info.align = Align(1); |
5410 | Info.flags |= MachineMemOperand::MOLoad; |
5411 | break; |
5412 | } |
5413 | case SCATTER: { |
5414 | Info.opc = ISD::INTRINSIC_VOID; |
5415 | Info.ptrVal = nullptr; |
5416 | MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType()); |
5417 | MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); |
5418 | unsigned NumElts = std::min(DataVT.getVectorNumElements(), |
5419 | IndexVT.getVectorNumElements()); |
5420 | Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); |
5421 | Info.align = Align(1); |
5422 | Info.flags |= MachineMemOperand::MOStore; |
5423 | break; |
5424 | } |
5425 | default: |
5426 | return false; |
5427 | } |
5428 | |
5429 | return true; |
5430 | } |
5431 | |
5432 | |
5433 | |
5434 | |
5435 | bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
5436 | bool ForCodeSize) const { |
5437 | for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) { |
5438 | if (Imm.bitwiseIsEqual(LegalFPImmediates[i])) |
5439 | return true; |
5440 | } |
5441 | return false; |
5442 | } |
5443 | |
5444 | bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load, |
5445 | ISD::LoadExtType ExtTy, |
5446 | EVT NewVT) const { |
5447 | assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"); |
5448 | |
5449 | |
5450 | |
5451 | SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr(); |
5452 | if (BasePtr.getOpcode() == X86ISD::WrapperRIP) |
5453 | if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0))) |
5454 | return GA->getTargetFlags() != X86II::MO_GOTTPOFF; |
5455 | |
5456 | |
5457 | |
5458 | |
5459 | EVT VT = Load->getValueType(0); |
5460 | if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) { |
5461 | for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) { |
5462 | |
5463 | if (UI.getUse().getResNo() != 0) |
5464 | continue; |
5465 | |
5466 | |
5467 | if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() || |
5468 | UI->use_begin()->getOpcode() != ISD::STORE) |
5469 | return true; |
5470 | } |
5471 | |
5472 | return false; |
5473 | } |
5474 | |
5475 | return true; |
5476 | } |
5477 | |
5478 | |
5479 | |
5480 | bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, |
5481 | Type *Ty) const { |
5482 | assert(Ty->isIntegerTy()); |
5483 | |
5484 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
5485 | if (BitSize == 0 || BitSize > 64) |
5486 | return false; |
5487 | return true; |
5488 | } |
5489 | |
5490 | bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { |
5491 | |
5492 | |
5493 | |
5494 | |
5495 | bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128; |
5496 | return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX(); |
5497 | } |
5498 | |
5499 | bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { |
5500 | |
5501 | |
5502 | if (VT.isVector() && Subtarget.hasAVX512()) |
5503 | return false; |
5504 | |
5505 | return true; |
5506 | } |
5507 | |
5508 | bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, |
5509 | SDValue C) const { |
5510 | |
5511 | |
5512 | APInt MulC; |
5513 | if (!ISD::isConstantSplatVector(C.getNode(), MulC)) |
5514 | return false; |
5515 | |
5516 | |
5517 | |
5518 | |
5519 | |
5520 | |
5521 | |
5522 | while (getTypeAction(Context, VT) != TypeLegal) |
5523 | VT = getTypeToTransformTo(Context, VT); |
5524 | |
5525 | |
5526 | |
5527 | |
5528 | |
5529 | if (isOperationLegal(ISD::MUL, VT)) |
5530 | return false; |
5531 | |
5532 | |
5533 | return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() || |
5534 | (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2(); |
5535 | } |
5536 | |
5537 | bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
5538 | unsigned Index) const { |
5539 | if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) |
5540 | return false; |
5541 | |
5542 | |
5543 | |
5544 | if (ResVT.getVectorElementType() == MVT::i1) |
5545 | return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) && |
5546 | (Index == ResVT.getVectorNumElements())); |
5547 | |
5548 | return (Index % ResVT.getVectorNumElements()) == 0; |
5549 | } |
5550 | |
5551 | bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const { |
5552 | unsigned Opc = VecOp.getOpcode(); |
5553 | |
5554 | |
5555 | |
5556 | if (Opc >= ISD::BUILTIN_OP_END) |
5557 | return false; |
5558 | |
5559 | |
5560 | EVT VecVT = VecOp.getValueType(); |
5561 | if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) |
5562 | return true; |
5563 | |
5564 | |
5565 | |
5566 | EVT ScalarVT = VecVT.getScalarType(); |
5567 | return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); |
5568 | } |
5569 | |
5570 | bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT, |
5571 | bool) const { |
5572 | |
5573 | if (VT.isVector()) |
5574 | return false; |
5575 | return VT.isSimple() || !isOperationExpand(Opcode, VT); |
5576 | } |
5577 | |
5578 | bool X86TargetLowering::isCheapToSpeculateCttz() const { |
5579 | |
5580 | return Subtarget.hasBMI(); |
5581 | } |
5582 | |
5583 | bool X86TargetLowering::isCheapToSpeculateCtlz() const { |
5584 | |
5585 | return Subtarget.hasLZCNT(); |
5586 | } |
5587 | |
5588 | bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
5589 | const SelectionDAG &DAG, |
5590 | const MachineMemOperand &MMO) const { |
5591 | if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() && |
5592 | BitcastVT.getVectorElementType() == MVT::i1) |
5593 | return false; |
5594 | |
5595 | if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8) |
5596 | return false; |
5597 | |
5598 | |
5599 | if (LoadVT.isVector() && BitcastVT.isVector() && |
5600 | isTypeLegal(LoadVT) && isTypeLegal(BitcastVT)) |
5601 | return true; |
5602 | |
5603 | return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO); |
5604 | } |
5605 | |
5606 | bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
5607 | const MachineFunction &MF) const { |
5608 | |
5609 | |
5610 | bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); |
5611 | |
5612 | if (NoFloat) { |
5613 | unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32; |
5614 | return (MemVT.getSizeInBits() <= MaxIntSize); |
5615 | } |
5616 | |
5617 | |
5618 | if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth()) |
5619 | return false; |
5620 | |
5621 | return true; |
5622 | } |
5623 | |
5624 | bool X86TargetLowering::isCtlzFast() const { |
5625 | return Subtarget.hasFastLZCNT(); |
5626 | } |
5627 | |
5628 | bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( |
5629 | const Instruction &AndI) const { |
5630 | return true; |
5631 | } |
5632 | |
5633 | bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { |
5634 | EVT VT = Y.getValueType(); |
5635 | |
5636 | if (VT.isVector()) |
5637 | return false; |
5638 | |
5639 | if (!Subtarget.hasBMI()) |
5640 | return false; |
5641 | |
5642 | |
5643 | if (VT != MVT::i32 && VT != MVT::i64) |
5644 | return false; |
5645 | |
5646 | return !isa<ConstantSDNode>(Y); |
5647 | } |
5648 | |
5649 | bool X86TargetLowering::hasAndNot(SDValue Y) const { |
5650 | EVT VT = Y.getValueType(); |
5651 | |
5652 | if (!VT.isVector()) |
5653 | return hasAndNotCompare(Y); |
5654 | |
5655 | |
5656 | |
5657 | if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128) |
5658 | return false; |
5659 | |
5660 | if (VT == MVT::v4i32) |
5661 | return true; |
5662 | |
5663 | return Subtarget.hasSSE2(); |
5664 | } |
5665 | |
5666 | bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const { |
5667 | return X.getValueType().isScalarInteger(); |
5668 | } |
5669 | |
5670 | bool X86TargetLowering:: |
5671 | shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
5672 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
5673 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
5674 | SelectionDAG &DAG) const { |
5675 | |
5676 | if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
5677 | X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) |
5678 | return false; |
5679 | |
5680 | if (X.getValueType().isScalarInteger()) |
5681 | return true; |
5682 | |
5683 | |
5684 | if (DAG.isSplatValue(Y, true)) |
5685 | return true; |
5686 | |
5687 | if (Subtarget.hasAVX2()) |
5688 | return true; |
5689 | |
5690 | return NewShiftOpcode == ISD::SHL; |
5691 | } |
5692 | |
5693 | bool X86TargetLowering::shouldFoldConstantShiftPairToMask( |
5694 | const SDNode *N, CombineLevel Level) const { |
5695 | assert(((N->getOpcode() == ISD::SHL && |
5696 | N->getOperand(0).getOpcode() == ISD::SRL) || |
5697 | (N->getOpcode() == ISD::SRL && |
5698 | N->getOperand(0).getOpcode() == ISD::SHL)) && |
5699 | "Expected shift-shift mask"); |
5700 | EVT VT = N->getValueType(0); |
5701 | if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) || |
5702 | (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) { |
5703 | |
5704 | |
5705 | |
5706 | return N->getOperand(1) == N->getOperand(0).getOperand(1); |
5707 | } |
5708 | return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level); |
5709 | } |
5710 | |
5711 | bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { |
5712 | EVT VT = Y.getValueType(); |
5713 | |
5714 | |
5715 | if (VT.isVector()) |
5716 | return false; |
5717 | |
5718 | |
5719 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
5720 | return false; |
5721 | |
5722 | return true; |
5723 | } |
5724 | |
5725 | bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG, |
5726 | SDNode *N) const { |
5727 | if (DAG.getMachineFunction().getFunction().hasMinSize() && |
5728 | !Subtarget.isOSWindows()) |
5729 | return false; |
5730 | return true; |
5731 | } |
5732 | |
5733 | bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { |
5734 | |
5735 | |
5736 | return isTypeLegal(VT); |
5737 | } |
5738 | |
5739 | MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const { |
5740 | MVT VT = MVT::getIntegerVT(NumBits); |
5741 | if (isTypeLegal(VT)) |
5742 | return VT; |
5743 | |
5744 | |
5745 | if (NumBits == 128 && isTypeLegal(MVT::v16i8)) |
5746 | return MVT::v16i8; |
5747 | |
5748 | |
5749 | if (NumBits == 256 && isTypeLegal(MVT::v32i8)) |
5750 | return MVT::v32i8; |
5751 | |
5752 | |
5753 | |
5754 | |
5755 | |
5756 | return MVT::INVALID_SIMPLE_VALUE_TYPE; |
5757 | } |
5758 | |
5759 | |
5760 | static bool isUndefOrEqual(int Val, int CmpVal) { |
5761 | return ((Val == SM_SentinelUndef) || (Val == CmpVal)); |
5762 | } |
5763 | |
5764 | |
5765 | |
5766 | static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) { |
5767 | return llvm::all_of(Mask, [CmpVal](int M) { |
5768 | return (M == SM_SentinelUndef) || (M == CmpVal); |
5769 | }); |
5770 | } |
5771 | |
5772 | |
5773 | static bool isUndefOrZero(int Val) { |
5774 | return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero)); |
5775 | } |
5776 | |
5777 | |
5778 | |
5779 | static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) { |
5780 | return llvm::all_of(Mask.slice(Pos, Size), |
5781 | [](int M) { return M == SM_SentinelUndef; }); |
5782 | } |
5783 | |
5784 | |
5785 | static bool isUndefLowerHalf(ArrayRef<int> Mask) { |
5786 | unsigned NumElts = Mask.size(); |
5787 | return isUndefInRange(Mask, 0, NumElts / 2); |
5788 | } |
5789 | |
5790 | |
5791 | static bool isUndefUpperHalf(ArrayRef<int> Mask) { |
5792 | unsigned NumElts = Mask.size(); |
5793 | return isUndefInRange(Mask, NumElts / 2, NumElts / 2); |
5794 | } |
5795 | |
5796 | |
5797 | static bool isInRange(int Val, int Low, int Hi) { |
5798 | return (Val >= Low && Val < Hi); |
5799 | } |
5800 | |
5801 | |
5802 | |
5803 | static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) { |
5804 | return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); }); |
5805 | } |
5806 | |
5807 | |
5808 | static bool isAnyZero(ArrayRef<int> Mask) { |
5809 | return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }); |
5810 | } |
5811 | |
5812 | |
5813 | |
5814 | static bool isAnyZeroOrUndef(ArrayRef<int> Mask) { |
5815 | return llvm::any_of(Mask, [](int M) { |
5816 | return M == SM_SentinelZero || M == SM_SentinelUndef; |
5817 | }); |
5818 | } |
5819 | |
5820 | |
5821 | |
5822 | static bool isUndefOrInRange(int Val, int Low, int Hi) { |
5823 | return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi); |
5824 | } |
5825 | |
5826 | |
5827 | |
5828 | static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) { |
5829 | return llvm::all_of( |
5830 | Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); }); |
5831 | } |
5832 | |
5833 | |
5834 | |
5835 | static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) { |
5836 | return isUndefOrZero(Val) || isInRange(Val, Low, Hi); |
5837 | } |
5838 | |
5839 | |
5840 | |
5841 | static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { |
5842 | return llvm::all_of( |
5843 | Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); }); |
5844 | } |
5845 | |
5846 | |
5847 | |
5848 | |
5849 | static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos, |
5850 | unsigned Size, int Low, int Step = 1) { |
5851 | for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) |
5852 | if (!isUndefOrEqual(Mask[i], Low)) |
5853 | return false; |
5854 | return true; |
5855 | } |
5856 | |
5857 | |
5858 | |
5859 | |
5860 | static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, |
5861 | unsigned Size, int Low, |
5862 | int Step = 1) { |
5863 | for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) |
5864 | if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) |
5865 | return false; |
5866 | return true; |
5867 | } |
5868 | |
5869 | |
5870 | |
5871 | static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, |
5872 | unsigned Size) { |
5873 | return llvm::all_of(Mask.slice(Pos, Size), |
5874 | [](int M) { return isUndefOrZero(M); }); |
5875 | } |
5876 | |
5877 | |
5878 | |
5879 | |
5880 | |
5881 | |
5882 | |
5883 | |
5884 | |
5885 | |
5886 | static bool canWidenShuffleElements(ArrayRef<int> Mask, |
5887 | SmallVectorImpl<int> &WidenedMask) { |
5888 | WidenedMask.assign(Mask.size() / 2, 0); |
5889 | for (int i = 0, Size = Mask.size(); i < Size; i += 2) { |
5890 | int M0 = Mask[i]; |
5891 | int M1 = Mask[i + 1]; |
5892 | |
5893 | |
5894 | if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) { |
5895 | WidenedMask[i / 2] = SM_SentinelUndef; |
5896 | continue; |
5897 | } |
5898 | |
5899 | |
5900 | |
5901 | if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) { |
5902 | WidenedMask[i / 2] = M1 / 2; |
5903 | continue; |
5904 | } |
5905 | if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) { |
5906 | WidenedMask[i / 2] = M0 / 2; |
5907 | continue; |
5908 | } |
5909 | |
5910 | |
5911 | if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) { |
5912 | if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) && |
5913 | (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) { |
5914 | WidenedMask[i / 2] = SM_SentinelZero; |
5915 | continue; |
5916 | } |
5917 | return false; |
5918 | } |
5919 | |
5920 | |
5921 | |
5922 | if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) { |
5923 | WidenedMask[i / 2] = M0 / 2; |
5924 | continue; |
5925 | } |
5926 | |
5927 | |
5928 | return false; |
5929 | } |
5930 | assert(WidenedMask.size() == Mask.size() / 2 && |
5931 | "Incorrect size of mask after widening the elements!"); |
5932 | |
5933 | return true; |
5934 | } |
5935 | |
5936 | static bool canWidenShuffleElements(ArrayRef<int> Mask, |
5937 | const APInt &Zeroable, |
5938 | bool V2IsZero, |
5939 | SmallVectorImpl<int> &WidenedMask) { |
5940 | |
5941 | |
5942 | SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end()); |
5943 | if (V2IsZero) { |
5944 | assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!"); |
5945 | for (int i = 0, Size = Mask.size(); i != Size; ++i) |
5946 | if (Mask[i] != SM_SentinelUndef && Zeroable[i]) |
5947 | ZeroableMask[i] = SM_SentinelZero; |
5948 | } |
5949 | return canWidenShuffleElements(ZeroableMask, WidenedMask); |
5950 | } |
5951 | |
5952 | static bool canWidenShuffleElements(ArrayRef<int> Mask) { |
5953 | SmallVector<int, 32> WidenedMask; |
5954 | return canWidenShuffleElements(Mask, WidenedMask); |
5955 | } |
5956 | |
5957 | |
5958 | |
5959 | static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts, |
5960 | SmallVectorImpl<int> &ScaledMask) { |
5961 | unsigned NumSrcElts = Mask.size(); |
5962 | assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && |
5963 | "Illegal shuffle scale factor"); |
5964 | |
5965 | |
5966 | if (NumDstElts >= NumSrcElts) { |
5967 | int Scale = NumDstElts / NumSrcElts; |
5968 | llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask); |
5969 | return true; |
5970 | } |
5971 | |
5972 | |
5973 | |
5974 | if (canWidenShuffleElements(Mask, ScaledMask)) { |
5975 | while (ScaledMask.size() > NumDstElts) { |
5976 | SmallVector<int, 16> WidenedMask; |
5977 | if (!canWidenShuffleElements(ScaledMask, WidenedMask)) |
5978 | return false; |
5979 | ScaledMask = std::move(WidenedMask); |
5980 | } |
5981 | return true; |
5982 | } |
5983 | |
5984 | return false; |
5985 | } |
5986 | |
5987 | |
5988 | bool X86::isZeroNode(SDValue Elt) { |
5989 | return isNullConstant(Elt) || isNullFPConstant(Elt); |
5990 | } |
5991 | |
5992 | |
5993 | |
5994 | |
5995 | static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG, |
5996 | const SDLoc &dl, bool IsMask = false) { |
5997 | |
5998 | SmallVector<SDValue, 32> Ops; |
5999 | bool Split = false; |
6000 | |
6001 | MVT ConstVecVT = VT; |
6002 | unsigned NumElts = VT.getVectorNumElements(); |
6003 | bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); |
6004 | if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { |
6005 | ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); |
6006 | Split = true; |
6007 | } |
6008 | |
6009 | MVT EltVT = ConstVecVT.getVectorElementType(); |
6010 | for (unsigned i = 0; i < NumElts; ++i) { |
6011 | bool IsUndef = Values[i] < 0 && IsMask; |
6012 | SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) : |
6013 | DAG.getConstant(Values[i], dl, EltVT); |
6014 | Ops.push_back(OpNode); |
6015 | if (Split) |
6016 | Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) : |
6017 | DAG.getConstant(0, dl, EltVT)); |
6018 | } |
6019 | SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); |
6020 | if (Split) |
6021 | ConstsNode = DAG.getBitcast(VT, ConstsNode); |
6022 | return ConstsNode; |
6023 | } |
6024 | |
6025 | static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs, |
6026 | MVT VT, SelectionDAG &DAG, const SDLoc &dl) { |
6027 | assert(Bits.size() == Undefs.getBitWidth() && |
6028 | "Unequal constant and undef arrays"); |
6029 | SmallVector<SDValue, 32> Ops; |
6030 | bool Split = false; |
6031 | |
6032 | MVT ConstVecVT = VT; |
6033 | unsigned NumElts = VT.getVectorNumElements(); |
6034 | bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); |
6035 | if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { |
6036 | ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); |
6037 | Split = true; |
6038 | } |
6039 | |
6040 | MVT EltVT = ConstVecVT.getVectorElementType(); |
6041 | for (unsigned i = 0, e = Bits.size(); i != e; ++i) { |
6042 | if (Undefs[i]) { |
6043 | Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT)); |
6044 | continue; |
6045 | } |
6046 | const APInt &V = Bits[i]; |
6047 | assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"); |
6048 | if (Split) { |
6049 | Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT)); |
6050 | Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT)); |
6051 | } else if (EltVT == MVT::f32) { |
6052 | APFloat FV(APFloat::IEEEsingle(), V); |
6053 | Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); |
6054 | } else if (EltVT == MVT::f64) { |
6055 | APFloat FV(APFloat::IEEEdouble(), V); |
6056 | Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); |
6057 | } else { |
6058 | Ops.push_back(DAG.getConstant(V, dl, EltVT)); |
6059 | } |
6060 | } |
6061 | |
6062 | SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); |
6063 | return DAG.getBitcast(VT, ConstsNode); |
6064 | } |
6065 | |
6066 | |
6067 | static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, |
6068 | SelectionDAG &DAG, const SDLoc &dl) { |
6069 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || |
6070 | VT.getVectorElementType() == MVT::i1) && |
6071 | "Unexpected vector type"); |
6072 | |
6073 | |
6074 | |
6075 | |
6076 | SDValue Vec; |
6077 | if (!Subtarget.hasSSE2() && VT.is128BitVector()) { |
6078 | Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); |
6079 | } else if (VT.isFloatingPoint()) { |
6080 | Vec = DAG.getConstantFP(+0.0, dl, VT); |
6081 | } else if (VT.getVectorElementType() == MVT::i1) { |
6082 | assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && |
6083 | "Unexpected vector type"); |
6084 | Vec = DAG.getConstant(0, dl, VT); |
6085 | } else { |
6086 | unsigned Num32BitElts = VT.getSizeInBits() / 32; |
6087 | Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts)); |
6088 | } |
6089 | return DAG.getBitcast(VT, Vec); |
6090 | } |
6091 | |
6092 | static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, |
6093 | const SDLoc &dl, unsigned vectorWidth) { |
6094 | EVT VT = Vec.getValueType(); |
6095 | EVT ElVT = VT.getVectorElementType(); |
6096 | unsigned Factor = VT.getSizeInBits() / vectorWidth; |
6097 | EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, |
6098 | VT.getVectorNumElements() / Factor); |
6099 | |
6100 | |
6101 | unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); |
6102 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); |
6103 | |
6104 | |
6105 | |
6106 | IdxVal &= ~(ElemsPerChunk - 1); |
6107 | |
6108 | |
6109 | if (Vec.getOpcode() == ISD::BUILD_VECTOR) |
6110 | return DAG.getBuildVector(ResultVT, dl, |
6111 | Vec->ops().slice(IdxVal, ElemsPerChunk)); |
6112 | |
6113 | SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); |
6114 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); |
6115 | } |
6116 | |
6117 | |
6118 | |
6119 | |
6120 | |
6121 | |
6122 | |
6123 | static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal, |
6124 | SelectionDAG &DAG, const SDLoc &dl) { |
6125 | assert((Vec.getValueType().is256BitVector() || |
6126 | Vec.getValueType().is512BitVector()) && "Unexpected vector size!"); |
6127 | return extractSubVector(Vec, IdxVal, DAG, dl, 128); |
6128 | } |
6129 | |
6130 | |
6131 | static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal, |
6132 | SelectionDAG &DAG, const SDLoc &dl) { |
6133 | assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!"); |
6134 | return extractSubVector(Vec, IdxVal, DAG, dl, 256); |
6135 | } |
6136 | |
6137 | static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, |
6138 | SelectionDAG &DAG, const SDLoc &dl, |
6139 | unsigned vectorWidth) { |
6140 | assert((vectorWidth == 128 || vectorWidth == 256) && |
6141 | "Unsupported vector width"); |
6142 | |
6143 | if (Vec.isUndef()) |
6144 | return Result; |
6145 | EVT VT = Vec.getValueType(); |
6146 | EVT ElVT = VT.getVectorElementType(); |
6147 | EVT ResultVT = Result.getValueType(); |
6148 | |
6149 | |
6150 | unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); |
6151 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); |
6152 | |
6153 | |
6154 | |
6155 | IdxVal &= ~(ElemsPerChunk - 1); |
6156 | |
6157 | SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); |
6158 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); |
6159 | } |
6160 | |
6161 | |
6162 | |
6163 | |
6164 | |
6165 | |
6166 | |
6167 | static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, |
6168 | SelectionDAG &DAG, const SDLoc &dl) { |
6169 | assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!"); |
6170 | return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128); |
6171 | } |
6172 | |
6173 | |
6174 | |
6175 | static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, |
6176 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
6177 | const SDLoc &dl) { |
6178 | assert(Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() && |
6179 | Vec.getValueType().getScalarType() == VT.getScalarType() && |
6180 | "Unsupported vector widening type"); |
6181 | SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl) |
6182 | : DAG.getUNDEF(VT); |
6183 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec, |
6184 | DAG.getIntPtrConstant(0, dl)); |
6185 | } |
6186 | |
6187 | |
6188 | |
6189 | static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements, |
6190 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
6191 | const SDLoc &dl, unsigned WideSizeInBits) { |
6192 | assert(Vec.getValueSizeInBits() < WideSizeInBits && |
6193 | (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && |
6194 | "Unsupported vector widening type"); |
6195 | unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits(); |
6196 | MVT SVT = Vec.getSimpleValueType().getScalarType(); |
6197 | MVT VT = MVT::getVectorVT(SVT, WideNumElts); |
6198 | return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl); |
6199 | } |
6200 | |
6201 | |
6202 | |
6203 | |
6204 | static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) { |
6205 | assert(Ops.empty() && "Expected an empty ops vector"); |
6206 | |
6207 | if (N->getOpcode() == ISD::CONCAT_VECTORS) { |
6208 | Ops.append(N->op_begin(), N->op_end()); |
6209 | return true; |
6210 | } |
6211 | |
6212 | if (N->getOpcode() == ISD::INSERT_SUBVECTOR) { |
6213 | SDValue Src = N->getOperand(0); |
6214 | SDValue Sub = N->getOperand(1); |
6215 | const APInt &Idx = N->getConstantOperandAPInt(2); |
6216 | EVT VT = Src.getValueType(); |
6217 | EVT SubVT = Sub.getValueType(); |
6218 | |
6219 | |
6220 | if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && |
6221 | Idx == (VT.getVectorNumElements() / 2)) { |
6222 | |
6223 | if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && |
6224 | Src.getOperand(1).getValueType() == SubVT && |
6225 | isNullConstant(Src.getOperand(2))) { |
6226 | Ops.push_back(Src.getOperand(1)); |
6227 | Ops.push_back(Sub); |
6228 | return true; |
6229 | } |
6230 | |
6231 | if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
6232 | Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) { |
6233 | Ops.append(2, Sub); |
6234 | return true; |
6235 | } |
6236 | } |
6237 | } |
6238 | |
6239 | return false; |
6240 | } |
6241 | |
6242 | static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG, |
6243 | const SDLoc &dl) { |
6244 | EVT VT = Op.getValueType(); |
6245 | unsigned NumElems = VT.getVectorNumElements(); |
6246 | unsigned SizeInBits = VT.getSizeInBits(); |
6247 | assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 && |
6248 | "Can't split odd sized vector"); |
6249 | |
6250 | SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2); |
6251 | SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2); |
6252 | return std::make_pair(Lo, Hi); |
6253 | } |
6254 | |
6255 | |
6256 | static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) { |
6257 | EVT VT = Op.getValueType(); |
6258 | |
6259 | |
6260 | |
6261 | assert((Op.getOperand(0).getValueType().is256BitVector() || |
6262 | Op.getOperand(0).getValueType().is512BitVector()) && |
6263 | (VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!"); |
6264 | assert(Op.getOperand(0).getValueType().getVectorNumElements() == |
6265 | VT.getVectorNumElements() && |
6266 | "Unexpected VTs!"); |
6267 | |
6268 | SDLoc dl(Op); |
6269 | |
6270 | |
6271 | SDValue Lo, Hi; |
6272 | std::tie(Lo, Hi) = splitVector(Op.getOperand(0), DAG, dl); |
6273 | |
6274 | EVT LoVT, HiVT; |
6275 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
6276 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, |
6277 | DAG.getNode(Op.getOpcode(), dl, LoVT, Lo), |
6278 | DAG.getNode(Op.getOpcode(), dl, HiVT, Hi)); |
6279 | } |
6280 | |
6281 | |
6282 | |
6283 | static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) { |
6284 | EVT VT = Op.getValueType(); |
6285 | |
6286 | |
6287 | assert(Op.getOperand(0).getValueType() == VT && |
6288 | Op.getOperand(1).getValueType() == VT && "Unexpected VTs!"); |
6289 | assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!"); |
6290 | |
6291 | SDLoc dl(Op); |
6292 | |
6293 | |
6294 | SDValue LHS1, LHS2; |
6295 | std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl); |
6296 | |
6297 | |
6298 | SDValue RHS1, RHS2; |
6299 | std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl); |
6300 | |
6301 | EVT LoVT, HiVT; |
6302 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
6303 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, |
6304 | DAG.getNode(Op.getOpcode(), dl, LoVT, LHS1, RHS1), |
6305 | DAG.getNode(Op.getOpcode(), dl, HiVT, LHS2, RHS2)); |
6306 | } |
6307 | |
6308 | |
6309 | |
6310 | |
6311 | |
6312 | |
6313 | |
6314 | |
6315 | template <typename F> |
6316 | SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, |
6317 | const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, |
6318 | F Builder, bool CheckBWI = true) { |
6319 | assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2"); |
6320 | unsigned NumSubs = 1; |
6321 | if ((CheckBWI && Subtarget.useBWIRegs()) || |
6322 | (!CheckBWI && Subtarget.useAVX512Regs())) { |
6323 | if (VT.getSizeInBits() > 512) { |
6324 | NumSubs = VT.getSizeInBits() / 512; |
6325 | assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"); |
6326 | } |
6327 | } else if (Subtarget.hasAVX2()) { |
6328 | if (VT.getSizeInBits() > 256) { |
6329 | NumSubs = VT.getSizeInBits() / 256; |
6330 | assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"); |
6331 | } |
6332 | } else { |
6333 | if (VT.getSizeInBits() > 128) { |
6334 | NumSubs = VT.getSizeInBits() / 128; |
6335 | assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"); |
6336 | } |
6337 | } |
6338 | |
6339 | if (NumSubs == 1) |
6340 | return Builder(DAG, DL, Ops); |
6341 | |
6342 | SmallVector<SDValue, 4> Subs; |
6343 | for (unsigned i = 0; i != NumSubs; ++i) { |
6344 | SmallVector<SDValue, 2> SubOps; |
6345 | for (SDValue Op : Ops) { |
6346 | EVT OpVT = Op.getValueType(); |
6347 | unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs; |
6348 | unsigned SizeSub = OpVT.getSizeInBits() / NumSubs; |
6349 | SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub)); |
6350 | } |
6351 | Subs.push_back(Builder(DAG, DL, SubOps)); |
6352 | } |
6353 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); |
6354 | } |
6355 | |
6356 | |
6357 | static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, |
6358 | const X86Subtarget &Subtarget) { |
6359 | |
6360 | SDLoc dl(Op); |
6361 | SDValue Vec = Op.getOperand(0); |
6362 | SDValue SubVec = Op.getOperand(1); |
6363 | SDValue Idx = Op.getOperand(2); |
6364 | unsigned IdxVal = Op.getConstantOperandVal(2); |
6365 | |
6366 | |
6367 | if (SubVec.isUndef()) |
6368 | return Vec; |
6369 | |
6370 | if (IdxVal == 0 && Vec.isUndef()) |
6371 | return Op; |
6372 | |
6373 | MVT OpVT = Op.getSimpleValueType(); |
6374 | unsigned NumElems = OpVT.getVectorNumElements(); |
6375 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); |
6376 | |
6377 | |
6378 | MVT WideOpVT = OpVT; |
6379 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) |
6380 | WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
6381 | |
6382 | |
6383 | |
6384 | if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) { |
6385 | |
6386 | Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6387 | DAG.getConstant(0, dl, WideOpVT), |
6388 | SubVec, Idx); |
6389 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
6390 | } |
6391 | |
6392 | MVT SubVecVT = SubVec.getSimpleValueType(); |
6393 | unsigned SubVecNumElems = SubVecVT.getVectorNumElements(); |
6394 | assert(IdxVal + SubVecNumElems <= NumElems && |
6395 | IdxVal % SubVecVT.getSizeInBits() == 0 && |
6396 | "Unexpected index value in INSERT_SUBVECTOR"); |
6397 | |
6398 | SDValue Undef = DAG.getUNDEF(WideOpVT); |
6399 | |
6400 | if (IdxVal == 0) { |
6401 | |
6402 | SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8); |
6403 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, |
6404 | ZeroIdx); |
6405 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); |
6406 | Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); |
6407 | |
6408 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6409 | DAG.getConstant(0, dl, WideOpVT), |
6410 | SubVec, ZeroIdx); |
6411 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
6412 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
6413 | } |
6414 | |
6415 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6416 | Undef, SubVec, ZeroIdx); |
6417 | |
6418 | if (Vec.isUndef()) { |
6419 | assert(IdxVal != 0 && "Unexpected index"); |
6420 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6421 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
6422 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
6423 | } |
6424 | |
6425 | if (ISD::isBuildVectorAllZeros(Vec.getNode())) { |
6426 | assert(IdxVal != 0 && "Unexpected index"); |
6427 | |
6428 | if (llvm::all_of(Vec->ops().slice(IdxVal + SubVecNumElems), |
6429 | [](SDValue V) { return V.isUndef(); })) { |
6430 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6431 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
6432 | } else { |
6433 | NumElems = WideOpVT.getVectorNumElements(); |
6434 | unsigned ShiftLeft = NumElems - SubVecNumElems; |
6435 | unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; |
6436 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6437 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
6438 | if (ShiftRight != 0) |
6439 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
6440 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
6441 | } |
6442 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
6443 | } |
6444 | |
6445 | |
6446 | if (IdxVal + SubVecNumElems == NumElems) { |
6447 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6448 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
6449 | if (SubVecNumElems * 2 == NumElems) { |
6450 | |
6451 | |
6452 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx); |
6453 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6454 | DAG.getConstant(0, dl, WideOpVT), |
6455 | Vec, ZeroIdx); |
6456 | } else { |
6457 | |
6458 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6459 | Undef, Vec, ZeroIdx); |
6460 | NumElems = WideOpVT.getVectorNumElements(); |
6461 | SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8); |
6462 | Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); |
6463 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); |
6464 | } |
6465 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
6466 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
6467 | } |
6468 | |
6469 | |
6470 | |
6471 | NumElems = WideOpVT.getVectorNumElements(); |
6472 | |
6473 | |
6474 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); |
6475 | |
6476 | unsigned ShiftLeft = NumElems - SubVecNumElems; |
6477 | unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; |
6478 | |
6479 | |
6480 | if (WideOpVT != MVT::v64i1 || Subtarget.is64Bit()) { |
6481 | APInt Mask0 = APInt::getBitsSet(NumElems, IdxVal, IdxVal + SubVecNumElems); |
6482 | Mask0.flipAllBits(); |
6483 | SDValue CMask0 = DAG.getConstant(Mask0, dl, MVT::getIntegerVT(NumElems)); |
6484 | SDValue VMask0 = DAG.getNode(ISD::BITCAST, dl, WideOpVT, CMask0); |
6485 | Vec = DAG.getNode(ISD::AND, dl, WideOpVT, Vec, VMask0); |
6486 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6487 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
6488 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
6489 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
6490 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
6491 | |
6492 | |
6493 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
6494 | } |
6495 | |
6496 | |
6497 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6498 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
6499 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
6500 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
6501 | |
6502 | |
6503 | unsigned LowShift = NumElems - IdxVal; |
6504 | SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, |
6505 | DAG.getTargetConstant(LowShift, dl, MVT::i8)); |
6506 | Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low, |
6507 | DAG.getTargetConstant(LowShift, dl, MVT::i8)); |
6508 | |
6509 | |
6510 | unsigned HighShift = IdxVal + SubVecNumElems; |
6511 | SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, |
6512 | DAG.getTargetConstant(HighShift, dl, MVT::i8)); |
6513 | High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High, |
6514 | DAG.getTargetConstant(HighShift, dl, MVT::i8)); |
6515 | |
6516 | |
6517 | Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High); |
6518 | SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec); |
6519 | |
6520 | |
6521 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
6522 | } |
6523 | |
6524 | static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG, |
6525 | const SDLoc &dl) { |
6526 | assert(V1.getValueType() == V2.getValueType() && "subvector type mismatch"); |
6527 | EVT SubVT = V1.getValueType(); |
6528 | EVT SubSVT = SubVT.getScalarType(); |
6529 | unsigned SubNumElts = SubVT.getVectorNumElements(); |
6530 | unsigned SubVectorWidth = SubVT.getSizeInBits(); |
6531 | EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts); |
6532 | SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth); |
6533 | return insertSubVector(V, V2, SubNumElts, DAG, dl, SubVectorWidth); |
6534 | } |
6535 | |
6536 | |
6537 | |
6538 | |
6539 | static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { |
6540 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && |
6541 | "Expected a 128/256/512-bit vector type"); |
6542 | |
6543 | APInt Ones = APInt::getAllOnesValue(32); |
6544 | unsigned NumElts = VT.getSizeInBits() / 32; |
6545 | SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts)); |
6546 | return DAG.getBitcast(VT, Vec); |
6547 | } |
6548 | |
6549 | |
6550 | static unsigned getOpcode_EXTEND(unsigned Opcode) { |
6551 | switch (Opcode) { |
6552 | case ISD::ANY_EXTEND: |
6553 | case ISD::ANY_EXTEND_VECTOR_INREG: |
6554 | return ISD::ANY_EXTEND; |
6555 | case ISD::ZERO_EXTEND: |
6556 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
6557 | return ISD::ZERO_EXTEND; |
6558 | case ISD::SIGN_EXTEND: |
6559 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
6560 | return ISD::SIGN_EXTEND; |
6561 | } |
6562 | llvm_unreachable("Unknown opcode"); |
6563 | } |
6564 | |
6565 | |
6566 | static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) { |
6567 | switch (Opcode) { |
6568 | case ISD::ANY_EXTEND: |
6569 | case ISD::ANY_EXTEND_VECTOR_INREG: |
6570 | return ISD::ANY_EXTEND_VECTOR_INREG; |
6571 | case ISD::ZERO_EXTEND: |
6572 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
6573 | return ISD::ZERO_EXTEND_VECTOR_INREG; |
6574 | case ISD::SIGN_EXTEND: |
6575 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
6576 | return ISD::SIGN_EXTEND_VECTOR_INREG; |
6577 | } |
6578 | llvm_unreachable("Unknown opcode"); |
6579 | } |
6580 | |
6581 | static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT, |
6582 | SDValue In, SelectionDAG &DAG) { |
6583 | EVT InVT = In.getValueType(); |
6584 | assert(VT.isVector() && InVT.isVector() && "Expected vector VTs."); |
6585 | assert((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || |
6586 | ISD::ZERO_EXTEND == Opcode) && |
6587 | "Unknown extension opcode"); |
6588 | |
6589 | |
6590 | |
6591 | if (InVT.getSizeInBits() > 128) { |
6592 | assert(VT.getSizeInBits() == InVT.getSizeInBits() && |
6593 | "Expected VTs to be the same size!"); |
6594 | unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits(); |
6595 | In = extractSubVector(In, 0, DAG, DL, |
6596 | std::max(128U, (unsigned)VT.getSizeInBits() / Scale)); |
6597 | InVT = In.getValueType(); |
6598 | } |
6599 | |
6600 | if (VT.getVectorNumElements() != InVT.getVectorNumElements()) |
6601 | Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode); |
6602 | |
6603 | return DAG.getNode(Opcode, DL, VT, In); |
6604 | } |
6605 | |
6606 | |
6607 | |
6608 | |
6609 | static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { |
6610 | V = peekThroughBitcasts(V); |
6611 | if (V.getOpcode() == ISD::XOR && |
6612 | ISD::isBuildVectorAllOnes(V.getOperand(1).getNode())) |
6613 | return V.getOperand(0); |
6614 | if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
6615 | (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { |
6616 | if (SDValue Not = IsNOT(V.getOperand(0), DAG)) { |
6617 | Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); |
6618 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(), |
6619 | Not, V.getOperand(1)); |
6620 | } |
6621 | } |
6622 | SmallVector<SDValue, 2> CatOps; |
6623 | if (collectConcatOps(V.getNode(), CatOps)) { |
6624 | for (SDValue &CatOp : CatOps) { |
6625 | SDValue NotCat = IsNOT(CatOp, DAG); |
6626 | if (!NotCat) return SDValue(); |
6627 | CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat); |
6628 | } |
6629 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps); |
6630 | } |
6631 | return SDValue(); |
6632 | } |
6633 | |
6634 | void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, |
6635 | bool Lo, bool Unary) { |
6636 | assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 && |
6637 | "Illegal vector type to unpack"); |
6638 | assert(Mask.empty() && "Expected an empty shuffle mask vector"); |
6639 | int NumElts = VT.getVectorNumElements(); |
6640 | int NumEltsInLane = 128 / VT.getScalarSizeInBits(); |
6641 | for (int i = 0; i < NumElts; ++i) { |
6642 | unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; |
6643 | int Pos = (i % NumEltsInLane) / 2 + LaneStart; |
6644 | Pos += (Unary ? 0 : NumElts * (i % 2)); |
6645 | Pos += (Lo ? 0 : NumEltsInLane / 2); |
6646 | Mask.push_back(Pos); |
6647 | } |
6648 | } |
6649 | |
6650 | |
6651 | |
6652 | |
6653 | |
6654 | void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, |
6655 | bool Lo) { |
6656 | assert(Mask.empty() && "Expected an empty shuffle mask vector"); |
6657 | int NumElts = VT.getVectorNumElements(); |
6658 | for (int i = 0; i < NumElts; ++i) { |
6659 | int Pos = i / 2; |
6660 | Pos += (Lo ? 0 : NumElts / 2); |
6661 | Mask.push_back(Pos); |
6662 | } |
6663 | } |
6664 | |
6665 | |
6666 | static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT, |
6667 | SDValue V1, SDValue V2) { |
6668 | SmallVector<int, 8> Mask; |
6669 | createUnpackShuffleMask(VT, Mask, true, false); |
6670 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
6671 | } |
6672 | |
6673 | |
6674 | static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT, |
6675 | SDValue V1, SDValue V2) { |
6676 | SmallVector<int, 8> Mask; |
6677 | createUnpackShuffleMask(VT, Mask, false, false); |
6678 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
6679 | } |
6680 | |
6681 | |
6682 | |
6683 | |
6684 | |
6685 | static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, |
6686 | bool IsZero, |
6687 | const X86Subtarget &Subtarget, |
6688 | SelectionDAG &DAG) { |
6689 | MVT VT = V2.getSimpleValueType(); |
6690 | SDValue V1 = IsZero |
6691 | ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); |
6692 | int NumElems = VT.getVectorNumElements(); |
6693 | SmallVector<int, 16> MaskVec(NumElems); |
6694 | for (int i = 0; i != NumElems; ++i) |
6695 | |
6696 | MaskVec[i] = (i == Idx) ? NumElems : i; |
6697 | return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec); |
6698 | } |
6699 | |
6700 | static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) { |
6701 | if (Ptr.getOpcode() == X86ISD::Wrapper || |
6702 | Ptr.getOpcode() == X86ISD::WrapperRIP) |
6703 | Ptr = Ptr.getOperand(0); |
6704 | |
6705 | auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); |
6706 | if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0) |
6707 | return nullptr; |
6708 | |
6709 | return CNode->getConstVal(); |
6710 | } |
6711 | |
6712 | static const Constant *getTargetConstantFromNode(LoadSDNode *Load) { |
6713 | if (!Load || !ISD::isNormalLoad(Load)) |
6714 | return nullptr; |
6715 | return getTargetConstantFromBasePtr(Load->getBasePtr()); |
6716 | } |
6717 | |
6718 | static const Constant *getTargetConstantFromNode(SDValue Op) { |
6719 | Op = peekThroughBitcasts(Op); |
6720 | return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op)); |
6721 | } |
6722 | |
6723 | const Constant * |
6724 | X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const { |
6725 | assert(LD && "Unexpected null LoadSDNode"); |
6726 | return getTargetConstantFromNode(LD); |
6727 | } |
6728 | |
6729 | |
6730 | static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, |
6731 | APInt &UndefElts, |
6732 | SmallVectorImpl<APInt> &EltBits, |
6733 | bool AllowWholeUndefs = true, |
6734 | bool AllowPartialUndefs = true) { |
6735 | assert(EltBits.empty() && "Expected an empty EltBits vector"); |
6736 | |
6737 | Op = peekThroughBitcasts(Op); |
6738 | |
6739 | EVT VT = Op.getValueType(); |
6740 | unsigned SizeInBits = VT.getSizeInBits(); |
6741 | assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); |
6742 | unsigned NumElts = SizeInBits / EltSizeInBits; |
6743 | |
6744 | |
6745 | auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) { |
6746 | unsigned NumSrcElts = UndefSrcElts.getBitWidth(); |
6747 | unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth(); |
6748 | assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits && |
6749 | "Constant bit sizes don't match"); |
6750 | |
6751 | |
6752 | bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; |
6753 | if (UndefSrcElts.getBoolValue() && !AllowUndefs) |
6754 | return false; |
6755 | |
6756 | |
6757 | if (NumSrcElts == NumElts) { |
6758 | UndefElts = UndefSrcElts; |
6759 | EltBits.assign(SrcEltBits.begin(), SrcEltBits.end()); |
6760 | return true; |
6761 | } |
6762 | |
6763 | |
6764 | APInt UndefBits(SizeInBits, 0); |
6765 | APInt MaskBits(SizeInBits, 0); |
6766 | |
6767 | for (unsigned i = 0; i != NumSrcElts; ++i) { |
6768 | unsigned BitOffset = i * SrcEltSizeInBits; |
6769 | if (UndefSrcElts[i]) |
6770 | UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); |
6771 | MaskBits.insertBits(SrcEltBits[i], BitOffset); |
6772 | } |
6773 | |
6774 | |
6775 | UndefElts = APInt(NumElts, 0); |
6776 | EltBits.resize(NumElts, APInt(EltSizeInBits, 0)); |
6777 | |
6778 | for (unsigned i = 0; i != NumElts; ++i) { |
6779 | unsigned BitOffset = i * EltSizeInBits; |
6780 | APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset); |
6781 | |
6782 | |
6783 | if (UndefEltBits.isAllOnesValue()) { |
6784 | if (!AllowWholeUndefs) |
6785 | return false; |
6786 | UndefElts.setBit(i); |
6787 | continue; |
6788 | } |
6789 | |
6790 | |
6791 | |
6792 | if (UndefEltBits.getBoolValue() && !AllowPartialUndefs) |
6793 | return false; |
6794 | |
6795 | EltBits[i] = MaskBits.extractBits(EltSizeInBits, BitOffset); |
6796 | } |
6797 | return true; |
6798 | }; |
6799 | |
6800 | |
6801 | auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs, |
6802 | unsigned UndefBitIndex) { |
6803 | if (!Cst) |
6804 | return false; |
6805 | if (isa<UndefValue>(Cst)) { |
6806 | Undefs.setBit(UndefBitIndex); |
6807 | return true; |
6808 | } |
6809 | if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { |
6810 | Mask = CInt->getValue(); |
6811 | return true; |
6812 | } |
6813 | if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { |
6814 | Mask = CFP->getValueAPF().bitcastToAPInt(); |
6815 | return true; |
6816 | } |
6817 | return false; |
6818 | }; |
6819 | |
6820 | |
6821 | if (Op.isUndef()) { |
6822 | APInt UndefSrcElts = APInt::getAllOnesValue(NumElts); |
6823 | SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0)); |
6824 | return CastBitData(UndefSrcElts, SrcEltBits); |
6825 | } |
6826 | |
6827 | |
6828 | if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) { |
6829 | APInt UndefSrcElts = APInt::getNullValue(1); |
6830 | SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue()); |
6831 | return CastBitData(UndefSrcElts, SrcEltBits); |
6832 | } |
6833 | if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) { |
6834 | APInt UndefSrcElts = APInt::getNullValue(1); |
6835 | APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); |
6836 | SmallVector<APInt, 64> SrcEltBits(1, RawBits); |
6837 | return CastBitData(UndefSrcElts, SrcEltBits); |
6838 | } |
6839 | |
6840 | |
6841 | if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { |
6842 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
6843 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6844 | |
6845 | APInt UndefSrcElts(NumSrcElts, 0); |
6846 | SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); |
6847 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
6848 | const SDValue &Src = Op.getOperand(i); |
6849 | if (Src.isUndef()) { |
6850 | UndefSrcElts.setBit(i); |
6851 | continue; |
6852 | } |
6853 | auto *Cst = cast<ConstantSDNode>(Src); |
6854 | SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); |
6855 | } |
6856 | return CastBitData(UndefSrcElts, SrcEltBits); |
6857 | } |
6858 | if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) { |
6859 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
6860 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6861 | |
6862 | APInt UndefSrcElts(NumSrcElts, 0); |
6863 | SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); |
6864 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
6865 | const SDValue &Src = Op.getOperand(i); |
6866 | if (Src.isUndef()) { |
6867 | UndefSrcElts.setBit(i); |
6868 | continue; |
6869 | } |
6870 | auto *Cst = cast<ConstantFPSDNode>(Src); |
6871 | APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); |
6872 | SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits); |
6873 | } |
6874 | return CastBitData(UndefSrcElts, SrcEltBits); |
6875 | } |
6876 | |
6877 | |
6878 | if (auto *Cst = getTargetConstantFromNode(Op)) { |
6879 | Type *CstTy = Cst->getType(); |
6880 | unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); |
6881 | if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0) |
6882 | return false; |
6883 | |
6884 | unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits(); |
6885 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6886 | |
6887 | APInt UndefSrcElts(NumSrcElts, 0); |
6888 | SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); |
6889 | for (unsigned i = 0; i != NumSrcElts; ++i) |
6890 | if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i], |
6891 | UndefSrcElts, i)) |
6892 | return false; |
6893 | |
6894 | return CastBitData(UndefSrcElts, SrcEltBits); |
6895 | } |
6896 | |
6897 | |
6898 | if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD && |
6899 | EltSizeInBits <= VT.getScalarSizeInBits()) { |
6900 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
6901 | if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits()) |
6902 | return false; |
6903 | |
6904 | SDValue Ptr = MemIntr->getBasePtr(); |
6905 | if (const Constant *C = getTargetConstantFromBasePtr(Ptr)) { |
6906 | unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits(); |
6907 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6908 | |
6909 | APInt UndefSrcElts(NumSrcElts, 0); |
6910 | SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); |
6911 | if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) { |
6912 | if (UndefSrcElts[0]) |
6913 | UndefSrcElts.setBits(0, NumSrcElts); |
6914 | SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); |
6915 | return CastBitData(UndefSrcElts, SrcEltBits); |
6916 | } |
6917 | } |
6918 | } |
6919 | |
6920 | |
6921 | if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { |
6922 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
6923 | SDValue Ptr = MemIntr->getBasePtr(); |
6924 | |
6925 | |
6926 | if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) { |
6927 | Type *CstTy = Cst->getType(); |
6928 | unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); |
6929 | unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits(); |
6930 | if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 || |
6931 | (SizeInBits % SubVecSizeInBits) != 0) |
6932 | return false; |
6933 | unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); |
6934 | unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits; |
6935 | unsigned NumSubVecs = SizeInBits / SubVecSizeInBits; |
6936 | APInt UndefSubElts(NumSubElts, 0); |
6937 | SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs, |
6938 | APInt(CstEltSizeInBits, 0)); |
6939 | for (unsigned i = 0; i != NumSubElts; ++i) { |
6940 | if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i], |
6941 | UndefSubElts, i)) |
6942 | return false; |
6943 | for (unsigned j = 1; j != NumSubVecs; ++j) |
6944 | SubEltBits[i + (j * NumSubElts)] = SubEltBits[i]; |
6945 | } |
6946 | UndefSubElts = APInt::getSplat(NumSubVecs * UndefSubElts.getBitWidth(), |
6947 | UndefSubElts); |
6948 | return CastBitData(UndefSubElts, SubEltBits); |
6949 | } |
6950 | } |
6951 | |
6952 | |
6953 | if (Op.getOpcode() == X86ISD::VZEXT_MOVL && |
6954 | Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && |
6955 | isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) { |
6956 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
6957 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6958 | |
6959 | APInt UndefSrcElts(NumSrcElts, 0); |
6960 | SmallVector<APInt, 64> SrcEltBits; |
6961 | auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0)); |
6962 | SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); |
6963 | SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); |
6964 | return CastBitData(UndefSrcElts, SrcEltBits); |
6965 | } |
6966 | |
6967 | |
6968 | if (Op.getOpcode() == ISD::INSERT_SUBVECTOR) { |
6969 | |
6970 | |
6971 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
6972 | bool AllowUndefs = EltSizeInBits >= SrcEltSizeInBits; |
6973 | |
6974 | APInt UndefSrcElts, UndefSubElts; |
6975 | SmallVector<APInt, 32> EltSrcBits, EltSubBits; |
6976 | if (getTargetConstantBitsFromNode(Op.getOperand(1), SrcEltSizeInBits, |
6977 | UndefSubElts, EltSubBits, |
6978 | AllowWholeUndefs && AllowUndefs, |
6979 | AllowPartialUndefs && AllowUndefs) && |
6980 | getTargetConstantBitsFromNode(Op.getOperand(0), SrcEltSizeInBits, |
6981 | UndefSrcElts, EltSrcBits, |
6982 | AllowWholeUndefs && AllowUndefs, |
6983 | AllowPartialUndefs && AllowUndefs)) { |
6984 | unsigned BaseIdx = Op.getConstantOperandVal(2); |
6985 | UndefSrcElts.insertBits(UndefSubElts, BaseIdx); |
6986 | for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i) |
6987 | EltSrcBits[BaseIdx + i] = EltSubBits[i]; |
6988 | return CastBitData(UndefSrcElts, EltSrcBits); |
6989 | } |
6990 | } |
6991 | |
6992 | |
6993 | if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
6994 | |
6995 | if (EltSizeInBits != VT.getScalarSizeInBits()) |
6996 | return false; |
6997 | |
6998 | if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, |
6999 | UndefElts, EltBits, AllowWholeUndefs, |
7000 | AllowPartialUndefs)) { |
7001 | EVT SrcVT = Op.getOperand(0).getValueType(); |
7002 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
7003 | unsigned NumSubElts = VT.getVectorNumElements(); |
7004 | unsigned BaseIdx = Op.getConstantOperandVal(1); |
7005 | UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx); |
7006 | if ((BaseIdx + NumSubElts) != NumSrcElts) |
7007 | EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end()); |
7008 | if (BaseIdx != 0) |
7009 | EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx); |
7010 | return true; |
7011 | } |
7012 | } |
7013 | |
7014 | |
7015 | if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Op)) { |
7016 | |
7017 | if (EltSizeInBits != VT.getScalarSizeInBits()) |
7018 | return false; |
7019 | |
7020 | ArrayRef<int> Mask = SVN->getMask(); |
7021 | if ((!AllowWholeUndefs || !AllowPartialUndefs) && |
7022 | llvm::any_of(Mask, [](int M) { return M < 0; })) |
7023 | return false; |
7024 | |
7025 | APInt UndefElts0, UndefElts1; |
7026 | SmallVector<APInt, 32> EltBits0, EltBits1; |
7027 | if (isAnyInRange(Mask, 0, NumElts) && |
7028 | !getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, |
7029 | UndefElts0, EltBits0, AllowWholeUndefs, |
7030 | AllowPartialUndefs)) |
7031 | return false; |
7032 | if (isAnyInRange(Mask, NumElts, 2 * NumElts) && |
7033 | !getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits, |
7034 | UndefElts1, EltBits1, AllowWholeUndefs, |
7035 | AllowPartialUndefs)) |
7036 | return false; |
7037 | |
7038 | UndefElts = APInt::getNullValue(NumElts); |
7039 | for (int i = 0; i != (int)NumElts; ++i) { |
7040 | int M = Mask[i]; |
7041 | if (M < 0) { |
7042 | UndefElts.setBit(i); |
7043 | EltBits.push_back(APInt::getNullValue(EltSizeInBits)); |
7044 | } else if (M < (int)NumElts) { |
7045 | if (UndefElts0[M]) |
7046 | UndefElts.setBit(i); |
7047 | EltBits.push_back(EltBits0[M]); |
7048 | } else { |
7049 | if (UndefElts1[M - NumElts]) |
7050 | UndefElts.setBit(i); |
7051 | EltBits.push_back(EltBits1[M - NumElts]); |
7052 | } |
7053 | } |
7054 | return true; |
7055 | } |
7056 | |
7057 | return false; |
7058 | } |
7059 | |
7060 | namespace llvm { |
7061 | namespace X86 { |
7062 | bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs) { |
7063 | APInt UndefElts; |
7064 | SmallVector<APInt, 16> EltBits; |
7065 | if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(), |
7066 | UndefElts, EltBits, true, |
7067 | AllowPartialUndefs)) { |
7068 | int SplatIndex = -1; |
7069 | for (int i = 0, e = EltBits.size(); i != e; ++i) { |
7070 | if (UndefElts[i]) |
7071 | continue; |
7072 | if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) { |
7073 | SplatIndex = -1; |
7074 | break; |
7075 | } |
7076 | SplatIndex = i; |
7077 | } |
7078 | if (0 <= SplatIndex) { |
7079 | SplatVal = EltBits[SplatIndex]; |
7080 | return true; |
7081 | } |
7082 | } |
7083 | |
7084 | return false; |
7085 | } |
7086 | } |
7087 | } |
7088 | |
7089 | static bool getTargetShuffleMaskIndices(SDValue MaskNode, |
7090 | unsigned MaskEltSizeInBits, |
7091 | SmallVectorImpl<uint64_t> &RawMask, |
7092 | APInt &UndefElts) { |
7093 | |
7094 | SmallVector<APInt, 64> EltBits; |
7095 | if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts, |
7096 | EltBits, true, |
7097 | false)) |
7098 | return false; |
7099 | |
7100 | |
7101 | for (const APInt &Elt : EltBits) |
7102 | RawMask.push_back(Elt.getZExtValue()); |
7103 | |
7104 | return true; |
7105 | } |
7106 | |
7107 | |
7108 | |
7109 | |
7110 | static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, |
7111 | bool Unary, unsigned NumStages = 1) { |
7112 | assert(Mask.empty() && "Expected an empty shuffle mask vector"); |
7113 | unsigned NumElts = VT.getVectorNumElements(); |
7114 | unsigned NumLanes = VT.getSizeInBits() / 128; |
7115 | unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits(); |
7116 | unsigned Offset = Unary ? 0 : NumElts; |
7117 | unsigned Repetitions = 1u << (NumStages - 1); |
7118 | unsigned Increment = 1u << NumStages; |
7119 | assert((NumEltsPerLane >> NumStages) > 0 && "Illegal packing compaction"); |
7120 | |
7121 | for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { |
7122 | for (unsigned Stage = 0; Stage != Repetitions; ++Stage) { |
7123 | for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment) |
7124 | Mask.push_back(Elt + (Lane * NumEltsPerLane)); |
7125 | for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment) |
7126 | Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset); |
7127 | } |
7128 | } |
7129 | } |
7130 | |
7131 | |
7132 | static void getPackDemandedElts(EVT VT, const APInt &DemandedElts, |
7133 | APInt &DemandedLHS, APInt &DemandedRHS) { |
7134 | int NumLanes = VT.getSizeInBits() / 128; |
7135 | int NumElts = DemandedElts.getBitWidth(); |
7136 | int NumInnerElts = NumElts / 2; |
7137 | int NumEltsPerLane = NumElts / NumLanes; |
7138 | int NumInnerEltsPerLane = NumInnerElts / NumLanes; |
7139 | |
7140 | DemandedLHS = APInt::getNullValue(NumInnerElts); |
7141 | DemandedRHS = APInt::getNullValue(NumInnerElts); |
7142 | |
7143 | |
7144 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
7145 | for (int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) { |
7146 | int OuterIdx = (Lane * NumEltsPerLane) + Elt; |
7147 | int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt; |
7148 | if (DemandedElts[OuterIdx]) |
7149 | DemandedLHS.setBit(InnerIdx); |
7150 | if (DemandedElts[OuterIdx + NumInnerEltsPerLane]) |
7151 | DemandedRHS.setBit(InnerIdx); |
7152 | } |
7153 | } |
7154 | } |
7155 | |
7156 | |
7157 | static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts, |
7158 | APInt &DemandedLHS, APInt &DemandedRHS) { |
7159 | int NumLanes = VT.getSizeInBits() / 128; |
7160 | int NumElts = DemandedElts.getBitWidth(); |
7161 | int NumEltsPerLane = NumElts / NumLanes; |
7162 | int HalfEltsPerLane = NumEltsPerLane / 2; |
7163 | |
7164 | DemandedLHS = APInt::getNullValue(NumElts); |
7165 | DemandedRHS = APInt::getNullValue(NumElts); |
7166 | |
7167 | |
7168 | for (int Idx = 0; Idx != NumElts; ++Idx) { |
7169 | if (!DemandedElts[Idx]) |
7170 | continue; |
7171 | int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane; |
7172 | int LocalIdx = Idx % NumEltsPerLane; |
7173 | if (LocalIdx < HalfEltsPerLane) { |
7174 | DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0); |
7175 | DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1); |
7176 | } else { |
7177 | LocalIdx -= HalfEltsPerLane; |
7178 | DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0); |
7179 | DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1); |
7180 | } |
7181 | } |
7182 | } |
7183 | |
7184 | |
7185 | |
7186 | |
7187 | |
7188 | |
7189 | |
7190 | |
7191 | static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, |
7192 | SmallVectorImpl<SDValue> &Ops, |
7193 | SmallVectorImpl<int> &Mask, bool &IsUnary) { |
7194 | unsigned NumElems = VT.getVectorNumElements(); |
7195 | unsigned MaskEltSize = VT.getScalarSizeInBits(); |
7196 | SmallVector<uint64_t, 32> RawMask; |
7197 | APInt RawUndefs; |
7198 | uint64_t ImmN; |
7199 | |
7200 | assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"); |
7201 | assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"); |
7202 | |
7203 | IsUnary = false; |
7204 | bool IsFakeUnary = false; |
7205 | switch (N->getOpcode()) { |
7206 | case X86ISD::BLENDI: |
7207 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7208 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7209 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7210 | DecodeBLENDMask(NumElems, ImmN, Mask); |
7211 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7212 | break; |
7213 | case X86ISD::SHUFP: |
7214 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7215 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7216 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7217 | DecodeSHUFPMask(NumElems, MaskEltSize, ImmN, Mask); |
7218 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7219 | break; |
7220 | case X86ISD::INSERTPS: |
7221 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7222 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7223 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7224 | DecodeINSERTPSMask(ImmN, Mask); |
7225 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7226 | break; |
7227 | case X86ISD::EXTRQI: |
7228 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7229 | if (isa<ConstantSDNode>(N->getOperand(1)) && |
7230 | isa<ConstantSDNode>(N->getOperand(2))) { |
7231 | int BitLen = N->getConstantOperandVal(1); |
7232 | int BitIdx = N->getConstantOperandVal(2); |
7233 | DecodeEXTRQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask); |
7234 | IsUnary = true; |
7235 | } |
7236 | break; |
7237 | case X86ISD::INSERTQI: |
7238 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7239 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7240 | if (isa<ConstantSDNode>(N->getOperand(2)) && |
7241 | isa<ConstantSDNode>(N->getOperand(3))) { |
7242 | int BitLen = N->getConstantOperandVal(2); |
7243 | int BitIdx = N->getConstantOperandVal(3); |
7244 | DecodeINSERTQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask); |
7245 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7246 | } |
7247 | break; |
7248 | case X86ISD::UNPCKH: |
7249 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7250 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7251 | DecodeUNPCKHMask(NumElems, MaskEltSize, Mask); |
7252 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7253 | break; |
7254 | case X86ISD::UNPCKL: |
7255 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7256 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7257 | DecodeUNPCKLMask(NumElems, MaskEltSize, Mask); |
7258 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7259 | break; |
7260 | case X86ISD::MOVHLPS: |
7261 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7262 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7263 | DecodeMOVHLPSMask(NumElems, Mask); |
7264 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7265 | break; |
7266 | case X86ISD::MOVLHPS: |
7267 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7268 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7269 | DecodeMOVLHPSMask(NumElems, Mask); |
7270 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7271 | break; |
7272 | case X86ISD::VALIGN: |
7273 | assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && |
7274 | "Only 32-bit and 64-bit elements are supported!"); |
7275 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7276 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7277 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7278 | DecodeVALIGNMask(NumElems, ImmN, Mask); |
7279 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7280 | Ops.push_back(N->getOperand(1)); |
7281 | Ops.push_back(N->getOperand(0)); |
7282 | break; |
7283 | case X86ISD::PALIGNR: |
7284 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
7285 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7286 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7287 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7288 | DecodePALIGNRMask(NumElems, ImmN, Mask); |
7289 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7290 | Ops.push_back(N->getOperand(1)); |
7291 | Ops.push_back(N->getOperand(0)); |
7292 | break; |
7293 | case X86ISD::VSHLDQ: |
7294 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
7295 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7296 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7297 | DecodePSLLDQMask(NumElems, ImmN, Mask); |
7298 | IsUnary = true; |
7299 | break; |
7300 | case X86ISD::VSRLDQ: |
7301 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
7302 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7303 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7304 | DecodePSRLDQMask(NumElems, ImmN, Mask); |
7305 | IsUnary = true; |
7306 | break; |
7307 | case X86ISD::PSHUFD: |
7308 | case X86ISD::VPERMILPI: |
7309 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7310 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7311 | DecodePSHUFMask(NumElems, MaskEltSize, ImmN, Mask); |
7312 | IsUnary = true; |
7313 | break; |
7314 | case X86ISD::PSHUFHW: |
7315 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7316 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7317 | DecodePSHUFHWMask(NumElems, ImmN, Mask); |
7318 | IsUnary = true; |
7319 | break; |
7320 | case X86ISD::PSHUFLW: |
7321 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7322 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7323 | DecodePSHUFLWMask(NumElems, ImmN, Mask); |
7324 | IsUnary = true; |
7325 | break; |
7326 | case X86ISD::VZEXT_MOVL: |
7327 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7328 | DecodeZeroMoveLowMask(NumElems, Mask); |
7329 | IsUnary = true; |
7330 | break; |
7331 | case X86ISD::VBROADCAST: |
7332 | |
7333 | |
7334 | |
7335 | if (N->getOperand(0).getValueType() == VT) { |
7336 | DecodeVectorBroadcast(NumElems, Mask); |
7337 | IsUnary = true; |
7338 | break; |
7339 | } |
7340 | return false; |
7341 | case X86ISD::VPERMILPV: { |
7342 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7343 | IsUnary = true; |
7344 | SDValue MaskNode = N->getOperand(1); |
7345 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
7346 | RawUndefs)) { |
7347 | DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask); |
7348 | break; |
7349 | } |
7350 | return false; |
7351 | } |
7352 | case X86ISD::PSHUFB: { |
7353 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
7354 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7355 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7356 | IsUnary = true; |
7357 | SDValue MaskNode = N->getOperand(1); |
7358 | if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { |
7359 | DecodePSHUFBMask(RawMask, RawUndefs, Mask); |
7360 | break; |
7361 | } |
7362 | return false; |
7363 | } |
7364 | case X86ISD::VPERMI: |
7365 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7366 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7367 | DecodeVPERMMask(NumElems, ImmN, Mask); |
7368 | IsUnary = true; |
7369 | break; |
7370 | case X86ISD::MOVSS: |
7371 | case X86ISD::MOVSD: |
7372 | case X86ISD::MOVSH: |
7373 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7374 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7375 | DecodeScalarMoveMask(NumElems, false, Mask); |
7376 | break; |
7377 | case X86ISD::VPERM2X128: |
7378 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7379 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7380 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7381 | DecodeVPERM2X128Mask(NumElems, ImmN, Mask); |
7382 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7383 | break; |
7384 | case X86ISD::SHUF128: |
7385 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7386 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7387 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7388 | decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize, ImmN, Mask); |
7389 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7390 | break; |
7391 | case X86ISD::MOVSLDUP: |
7392 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7393 | DecodeMOVSLDUPMask(NumElems, Mask); |
7394 | IsUnary = true; |
7395 | break; |
7396 | case X86ISD::MOVSHDUP: |
7397 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7398 | DecodeMOVSHDUPMask(NumElems, Mask); |
7399 | IsUnary = true; |
7400 | break; |
7401 | case X86ISD::MOVDDUP: |
7402 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7403 | DecodeMOVDDUPMask(NumElems, Mask); |
7404 | IsUnary = true; |
7405 | break; |
7406 | case X86ISD::VPERMIL2: { |
7407 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7408 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7409 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7410 | SDValue MaskNode = N->getOperand(2); |
7411 | SDValue CtrlNode = N->getOperand(3); |
7412 | if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) { |
7413 | unsigned CtrlImm = CtrlOp->getZExtValue(); |
7414 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
7415 | RawUndefs)) { |
7416 | DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs, |
7417 | Mask); |
7418 | break; |
7419 | } |
7420 | } |
7421 | return false; |
7422 | } |
7423 | case X86ISD::VPPERM: { |
7424 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7425 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7426 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7427 | SDValue MaskNode = N->getOperand(2); |
7428 | if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { |
7429 | DecodeVPPERMMask(RawMask, RawUndefs, Mask); |
7430 | break; |
7431 | } |
7432 | return false; |
7433 | } |
7434 | case X86ISD::VPERMV: { |
7435 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7436 | IsUnary = true; |
7437 | |
7438 | Ops.push_back(N->getOperand(1)); |
7439 | SDValue MaskNode = N->getOperand(0); |
7440 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
7441 | RawUndefs)) { |
7442 | DecodeVPERMVMask(RawMask, RawUndefs, Mask); |
7443 | break; |
7444 | } |
7445 | return false; |
7446 | } |
7447 | case X86ISD::VPERMV3: { |
7448 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7449 | assert(N->getOperand(2).getValueType() == VT && "Unexpected value type"); |
7450 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2); |
7451 | |
7452 | Ops.push_back(N->getOperand(0)); |
7453 | Ops.push_back(N->getOperand(2)); |
7454 | SDValue MaskNode = N->getOperand(1); |
7455 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
7456 | RawUndefs)) { |
7457 | DecodeVPERMV3Mask(RawMask, RawUndefs, Mask); |
7458 | break; |
7459 | } |
7460 | return false; |
7461 | } |
7462 | default: llvm_unreachable("unknown target shuffle node"); |
7463 | } |
7464 | |
7465 | |
7466 | if (Mask.empty()) |
7467 | return false; |
7468 | |
7469 | |
7470 | if (!AllowSentinelZero && isAnyZero(Mask)) |
7471 | return false; |
7472 | |
7473 | |
7474 | |
7475 | |
7476 | if (IsFakeUnary) |
7477 | for (int &M : Mask) |
7478 | if (M >= (int)Mask.size()) |
7479 | M -= Mask.size(); |
7480 | |
7481 | |
7482 | |
7483 | if (Ops.empty()) { |
7484 | Ops.push_back(N->getOperand(0)); |
7485 | if (!IsUnary || IsFakeUnary) |
7486 | Ops.push_back(N->getOperand(1)); |
7487 | } |
7488 | |
7489 | return true; |
7490 | } |
7491 | |
7492 | |
7493 | static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, |
7494 | SmallVectorImpl<SDValue> &Ops, |
7495 | SmallVectorImpl<int> &Mask) { |
7496 | bool IsUnary; |
7497 | return getTargetShuffleMask(N, VT, AllowSentinelZero, Ops, Mask, IsUnary); |
7498 | } |
7499 | |
7500 | |
7501 | |
7502 | |
7503 | |
7504 | |
7505 | |
7506 | |
7507 | |
7508 | static void computeZeroableShuffleElements(ArrayRef<int> Mask, |
7509 | SDValue V1, SDValue V2, |
7510 | APInt &KnownUndef, APInt &KnownZero) { |
7511 | int Size = Mask.size(); |
7512 | KnownUndef = KnownZero = APInt::getNullValue(Size); |
7513 | |
7514 | V1 = peekThroughBitcasts(V1); |
7515 | V2 = peekThroughBitcasts(V2); |
7516 | |
7517 | bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); |
7518 | bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); |
7519 | |
7520 | int VectorSizeInBits = V1.getValueSizeInBits(); |
7521 | int ScalarSizeInBits = VectorSizeInBits / Size; |
7522 | assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size"); |
7523 | |
7524 | for (int i = 0; i < Size; ++i) { |
7525 | int M = Mask[i]; |
7526 | |
7527 | if (M < 0) { |
7528 | KnownUndef.setBit(i); |
7529 | continue; |
7530 | } |
7531 | if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { |
7532 | KnownZero.setBit(i); |
7533 | continue; |
7534 | } |
7535 | |
7536 | |
7537 | SDValue V = M < Size ? V1 : V2; |
7538 | M %= Size; |
7539 | |
7540 | |
7541 | if (V.getOpcode() != ISD::BUILD_VECTOR) |
7542 | continue; |
7543 | |
7544 | |
7545 | |
7546 | if ((Size % V.getNumOperands()) == 0) { |
7547 | int Scale = Size / V->getNumOperands(); |
7548 | SDValue Op = V.getOperand(M / Scale); |
7549 | if (Op.isUndef()) |
7550 | KnownUndef.setBit(i); |
7551 | if (X86::isZeroNode(Op)) |
7552 | KnownZero.setBit(i); |
7553 | else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { |
7554 | APInt Val = Cst->getAPIntValue(); |
7555 | Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); |
7556 | if (Val == 0) |
7557 | KnownZero.setBit(i); |
7558 | } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) { |
7559 | APInt Val = Cst->getValueAPF().bitcastToAPInt(); |
7560 | Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); |
7561 | if (Val == 0) |
7562 | KnownZero.setBit(i); |
7563 | } |
7564 | continue; |
7565 | } |
7566 | |
7567 | |
7568 | |
7569 | if ((V.getNumOperands() % Size) == 0) { |
7570 | int Scale = V->getNumOperands() / Size; |
7571 | bool AllUndef = true; |
7572 | bool AllZero = true; |
7573 | for (int j = 0; j < Scale; ++j) { |
7574 | SDValue Op = V.getOperand((M * Scale) + j); |
7575 | AllUndef &= Op.isUndef(); |
7576 | AllZero &= X86::isZeroNode(Op); |
7577 | } |
7578 | if (AllUndef) |
7579 | KnownUndef.setBit(i); |
7580 | if (AllZero) |
7581 | KnownZero.setBit(i); |
7582 | continue; |
7583 | } |
7584 | } |
7585 | } |
7586 | |
7587 | |
7588 | |
7589 | |
7590 | |
7591 | static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask, |
7592 | SmallVectorImpl<SDValue> &Ops, |
7593 | APInt &KnownUndef, APInt &KnownZero) { |
7594 | bool IsUnary; |
7595 | if (!isTargetShuffle(N.getOpcode())) |
7596 | return false; |
7597 | |
7598 | MVT VT = N.getSimpleValueType(); |
7599 | if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary)) |
7600 | return false; |
7601 | |
7602 | int Size = Mask.size(); |
7603 | SDValue V1 = Ops[0]; |
7604 | SDValue V2 = IsUnary ? V1 : Ops[1]; |
7605 | KnownUndef = KnownZero = APInt::getNullValue(Size); |
7606 | |
7607 | V1 = peekThroughBitcasts(V1); |
7608 | V2 = peekThroughBitcasts(V2); |
7609 | |
7610 | assert((VT.getSizeInBits() % Size) == 0 && |
7611 | "Illegal split of shuffle value type"); |
7612 | unsigned EltSizeInBits = VT.getSizeInBits() / Size; |
7613 | |
7614 | |
7615 | APInt UndefSrcElts[2]; |
7616 | SmallVector<APInt, 32> SrcEltBits[2]; |
7617 | bool IsSrcConstant[2] = { |
7618 | getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0], |
7619 | SrcEltBits[0], true, false), |
7620 | getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1], |
7621 | SrcEltBits[1], true, false)}; |
7622 | |
7623 | for (int i = 0; i < Size; ++i) { |
7624 | int M = Mask[i]; |
7625 | |
7626 | |
7627 | if (M < 0) { |
7628 | assert(isUndefOrZero(M) && "Unknown shuffle sentinel value!"); |
7629 | if (SM_SentinelUndef == M) |
7630 | KnownUndef.setBit(i); |
7631 | if (SM_SentinelZero == M) |
7632 | KnownZero.setBit(i); |
7633 | continue; |
7634 | } |
7635 | |
7636 | |
7637 | unsigned SrcIdx = M / Size; |
7638 | SDValue V = M < Size ? V1 : V2; |
7639 | M %= Size; |
7640 | |
7641 | |
7642 | if (V.isUndef()) { |
7643 | KnownUndef.setBit(i); |
7644 | continue; |
7645 | } |
7646 | |
7647 | |
7648 | |
7649 | |
7650 | |
7651 | if (V.getOpcode() == ISD::SCALAR_TO_VECTOR && |
7652 | (Size % V.getValueType().getVectorNumElements()) == 0) { |
7653 | int Scale = Size / V.getValueType().getVectorNumElements(); |
7654 | int Idx = M / Scale; |
7655 | if (Idx != 0 && !VT.isFloatingPoint()) |
7656 | KnownUndef.setBit(i); |
7657 | else if (Idx == 0 && X86::isZeroNode(V.getOperand(0))) |
7658 | KnownZero.setBit(i); |
7659 | continue; |
7660 | } |
7661 | |
7662 | |
7663 | |
7664 | if (V.getOpcode() == ISD::INSERT_SUBVECTOR) { |
7665 | SDValue Vec = V.getOperand(0); |
7666 | int NumVecElts = Vec.getValueType().getVectorNumElements(); |
7667 | if (Vec.isUndef() && Size == NumVecElts) { |
7668 | int Idx = V.getConstantOperandVal(2); |
7669 | int NumSubElts = V.getOperand(1).getValueType().getVectorNumElements(); |
7670 | if (M < Idx || (Idx + NumSubElts) <= M) |
7671 | KnownUndef.setBit(i); |
7672 | } |
7673 | continue; |
7674 | } |
7675 | |
7676 | |
7677 | if (IsSrcConstant[SrcIdx]) { |
7678 | if (UndefSrcElts[SrcIdx][M]) |
7679 | KnownUndef.setBit(i); |
7680 | else if (SrcEltBits[SrcIdx][M] == 0) |
7681 | KnownZero.setBit(i); |
7682 | } |
7683 | } |
7684 | |
7685 | assert(VT.getVectorNumElements() == (unsigned)Size && |
7686 | "Different mask size from vector size!"); |
7687 | return true; |
7688 | } |
7689 | |
7690 | |
7691 | static void resolveTargetShuffleFromZeroables(SmallVectorImpl<int> &Mask, |
7692 | const APInt &KnownUndef, |
7693 | const APInt &KnownZero, |
7694 | bool ResolveKnownZeros= true) { |
7695 | unsigned NumElts = Mask.size(); |
7696 | assert(KnownUndef.getBitWidth() == NumElts && |
7697 | KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch"); |
7698 | |
7699 | for (unsigned i = 0; i != NumElts; ++i) { |
7700 | if (KnownUndef[i]) |
7701 | Mask[i] = SM_SentinelUndef; |
7702 | else if (ResolveKnownZeros && KnownZero[i]) |
7703 | Mask[i] = SM_SentinelZero; |
7704 | } |
7705 | } |
7706 | |
7707 | |
7708 | static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl<int> &Mask, |
7709 | APInt &KnownUndef, |
7710 | APInt &KnownZero) { |
7711 | unsigned NumElts = Mask.size(); |
7712 | KnownUndef = KnownZero = APInt::getNullValue(NumElts); |
7713 | |
7714 | for (unsigned i = 0; i != NumElts; ++i) { |
7715 | int M = Mask[i]; |
7716 | if (SM_SentinelUndef == M) |
7717 | KnownUndef.setBit(i); |
7718 | if (SM_SentinelZero == M) |
7719 | KnownZero.setBit(i); |
7720 | } |
7721 | } |
7722 | |
7723 | |
7724 | |
7725 | static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs, |
7726 | SmallVectorImpl<int> &Mask, |
7727 | const SelectionDAG &DAG, unsigned Depth, |
7728 | bool ResolveKnownElts); |
7729 | |
7730 | |
7731 | |
7732 | |
7733 | static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, |
7734 | SmallVectorImpl<int> &Mask, |
7735 | SmallVectorImpl<SDValue> &Ops, |
7736 | const SelectionDAG &DAG, unsigned Depth, |
7737 | bool ResolveKnownElts) { |
7738 | Mask.clear(); |
7739 | Ops.clear(); |
7740 | |
7741 | MVT VT = N.getSimpleValueType(); |
7742 | unsigned NumElts = VT.getVectorNumElements(); |
7743 | unsigned NumSizeInBits = VT.getSizeInBits(); |
7744 | unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
7745 | if ((NumBitsPerElt % 8) != 0 || (NumSizeInBits % 8) != 0) |
7746 | return false; |
7747 | assert(NumElts == DemandedElts.getBitWidth() && "Unexpected vector size"); |
7748 | unsigned NumSizeInBytes = NumSizeInBits / 8; |
7749 | unsigned NumBytesPerElt = NumBitsPerElt / 8; |
7750 | |
7751 | unsigned Opcode = N.getOpcode(); |
7752 | switch (Opcode) { |
7753 | case ISD::VECTOR_SHUFFLE: { |
7754 | |
7755 | ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(N)->getMask(); |
7756 | if (isUndefOrInRange(ShuffleMask, 0, 2 * NumElts)) { |
7757 | Mask.append(ShuffleMask.begin(), ShuffleMask.end()); |
7758 | Ops.push_back(N.getOperand(0)); |
7759 | Ops.push_back(N.getOperand(1)); |
7760 | return true; |
7761 | } |
7762 | return false; |
7763 | } |
7764 | case ISD::AND: |
7765 | case X86ISD::ANDNP: { |
7766 | |
7767 | APInt UndefElts; |
7768 | SmallVector<APInt, 32> EltBits; |
7769 | SDValue N0 = N.getOperand(0); |
7770 | SDValue N1 = N.getOperand(1); |
7771 | bool IsAndN = (X86ISD::ANDNP == Opcode); |
7772 | uint64_t ZeroMask = IsAndN ? 255 : 0; |
7773 | if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits)) |
7774 | return false; |
7775 | for (int i = 0, e = (int)EltBits.size(); i != e; ++i) { |
7776 | if (UndefElts[i]) { |
7777 | Mask.push_back(SM_SentinelUndef); |
7778 | continue; |
7779 | } |
7780 | const APInt &ByteBits = EltBits[i]; |
7781 | if (ByteBits != 0 && ByteBits != 255) |
7782 | return false; |
7783 | Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i); |
7784 | } |
7785 | Ops.push_back(IsAndN ? N1 : N0); |
7786 | return true; |
7787 | } |
7788 | case ISD::OR: { |
7789 | |
7790 | |
7791 | SDValue N0 = peekThroughBitcasts(N.getOperand(0)); |
7792 | SDValue N1 = peekThroughBitcasts(N.getOperand(1)); |
7793 | if (!N0.getValueType().isVector() || !N1.getValueType().isVector()) |
7794 | return false; |
7795 | SmallVector<int, 64> SrcMask0, SrcMask1; |
7796 | SmallVector<SDValue, 2> SrcInputs0, SrcInputs1; |
7797 | if (!getTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG, Depth + 1, |
7798 | true) || |
7799 | !getTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG, Depth + 1, |
7800 | true)) |
7801 | return false; |
7802 | |
7803 | size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); |
7804 | SmallVector<int, 64> Mask0, Mask1; |
7805 | narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0); |
7806 | narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1); |
7807 | for (int i = 0; i != (int)MaskSize; ++i) { |
7808 | |
7809 | |
7810 | |
7811 | |
7812 | if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) |
7813 | Mask.push_back(SM_SentinelZero); |
7814 | else if (Mask1[i] == SM_SentinelZero) |
7815 | Mask.push_back(i); |
7816 | else if (Mask0[i] == SM_SentinelZero) |
7817 | Mask.push_back(i + MaskSize); |
7818 | else |
7819 | return false; |
7820 | } |
7821 | Ops.push_back(N0); |
7822 | Ops.push_back(N1); |
7823 | return true; |
7824 | } |
7825 | case ISD::INSERT_SUBVECTOR: { |
7826 | SDValue Src = N.getOperand(0); |
7827 | SDValue Sub = N.getOperand(1); |
7828 | EVT SubVT = Sub.getValueType(); |
7829 | unsigned NumSubElts = SubVT.getVectorNumElements(); |
7830 | if (!N->isOnlyUserOf(Sub.getNode())) |
7831 | return false; |
7832 | uint64_t InsertIdx = N.getConstantOperandVal(2); |
7833 | |
7834 | if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
7835 | Sub.getOperand(0).getValueType() == VT) { |
7836 | uint64_t ExtractIdx = Sub.getConstantOperandVal(1); |
7837 | for (int i = 0; i != (int)NumElts; ++i) |
7838 | Mask.push_back(i); |
7839 | for (int i = 0; i != (int)NumSubElts; ++i) |
7840 | Mask[InsertIdx + i] = NumElts + ExtractIdx + i; |
7841 | Ops.push_back(Src); |
7842 | Ops.push_back(Sub.getOperand(0)); |
7843 | return true; |
7844 | } |
7845 | |
7846 | SmallVector<int, 64> SubMask; |
7847 | SmallVector<SDValue, 2> SubInputs; |
7848 | if (!getTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, |
7849 | SubMask, DAG, Depth + 1, ResolveKnownElts)) |
7850 | return false; |
7851 | |
7852 | |
7853 | if (llvm::any_of(SubInputs, [SubVT](SDValue SubInput) { |
7854 | return SubVT.getFixedSizeInBits() < |
7855 | SubInput.getValueSizeInBits().getFixedSize(); |
7856 | })) |
7857 | return false; |
7858 | |
7859 | if (SubMask.size() != NumSubElts) { |
7860 | assert(((SubMask.size() % NumSubElts) == 0 || |
7861 | (NumSubElts % SubMask.size()) == 0) && "Illegal submask scale"); |
7862 | if ((NumSubElts % SubMask.size()) == 0) { |
7863 | int Scale = NumSubElts / SubMask.size(); |
7864 | SmallVector<int,64> ScaledSubMask; |
7865 | narrowShuffleMaskElts(Scale, SubMask, ScaledSubMask); |
7866 | SubMask = ScaledSubMask; |
7867 | } else { |
7868 | int Scale = SubMask.size() / NumSubElts; |
7869 | NumSubElts = SubMask.size(); |
7870 | NumElts *= Scale; |
7871 | InsertIdx *= Scale; |
7872 | } |
7873 | } |
7874 | Ops.push_back(Src); |
7875 | Ops.append(SubInputs.begin(), SubInputs.end()); |
7876 | if (ISD::isBuildVectorAllZeros(Src.getNode())) |
7877 | Mask.append(NumElts, SM_SentinelZero); |
7878 | else |
7879 | for (int i = 0; i != (int)NumElts; ++i) |
7880 | Mask.push_back(i); |
7881 | for (int i = 0; i != (int)NumSubElts; ++i) { |
7882 | int M = SubMask[i]; |
7883 | if (0 <= M) { |
7884 | int InputIdx = M / NumSubElts; |
7885 | M = (NumElts * (1 + InputIdx)) + (M % NumSubElts); |
7886 | } |
7887 | Mask[i + InsertIdx] = M; |
7888 | } |
7889 | return true; |
7890 | } |
7891 | case X86ISD::PINSRB: |
7892 | case X86ISD::PINSRW: |
7893 | case ISD::SCALAR_TO_VECTOR: |
7894 | case ISD::INSERT_VECTOR_ELT: { |
7895 | |
7896 | |
7897 | SDValue Scl = N.getOperand(Opcode == ISD::SCALAR_TO_VECTOR ? 0 : 1); |
7898 | |
7899 | unsigned DstIdx = 0; |
7900 | if (Opcode != ISD::SCALAR_TO_VECTOR) { |
7901 | |
7902 | if (!isa<ConstantSDNode>(N.getOperand(2)) || |
7903 | N.getConstantOperandAPInt(2).uge(NumElts)) |
7904 | return false; |
7905 | DstIdx = N.getConstantOperandVal(2); |
7906 | |
7907 | |
7908 | if (X86::isZeroNode(Scl)) { |
7909 | Ops.push_back(N.getOperand(0)); |
7910 | for (unsigned i = 0; i != NumElts; ++i) |
7911 | Mask.push_back(i == DstIdx ? SM_SentinelZero : (int)i); |
7912 | return true; |
7913 | } |
7914 | } |
7915 | |
7916 | |
7917 | |
7918 | |
7919 | unsigned MinBitsPerElt = Scl.getScalarValueSizeInBits(); |
7920 | while (Scl.getOpcode() == ISD::TRUNCATE || |
7921 | Scl.getOpcode() == ISD::ANY_EXTEND || |
7922 | Scl.getOpcode() == ISD::ZERO_EXTEND) { |
7923 | Scl = Scl.getOperand(0); |
7924 | MinBitsPerElt = |
7925 | std::min<unsigned>(MinBitsPerElt, Scl.getScalarValueSizeInBits()); |
7926 | } |
7927 | if ((MinBitsPerElt % 8) != 0) |
7928 | return false; |
7929 | |
7930 | |
7931 | SDValue SrcExtract; |
7932 | if ((Scl.getOpcode() == ISD::EXTRACT_VECTOR_ELT || |
7933 | Scl.getOpcode() == X86ISD::PEXTRW || |
7934 | Scl.getOpcode() == X86ISD::PEXTRB) && |
7935 | Scl.getOperand(0).getValueSizeInBits() == NumSizeInBits) { |
7936 | SrcExtract = Scl; |
7937 | } |
7938 | if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1))) |
7939 | return false; |
7940 | |
7941 | SDValue SrcVec = SrcExtract.getOperand(0); |
7942 | EVT SrcVT = SrcVec.getValueType(); |
7943 | if (!SrcVT.getScalarType().isByteSized()) |
7944 | return false; |
7945 | unsigned SrcIdx = SrcExtract.getConstantOperandVal(1); |
7946 | unsigned SrcByte = SrcIdx * (SrcVT.getScalarSizeInBits() / 8); |
7947 | unsigned DstByte = DstIdx * NumBytesPerElt; |
7948 | MinBitsPerElt = |
7949 | std::min<unsigned>(MinBitsPerElt, SrcVT.getScalarSizeInBits()); |
7950 | |
7951 | |
7952 | if (Opcode == ISD::SCALAR_TO_VECTOR) { |
7953 | Ops.push_back(SrcVec); |
7954 | Mask.append(NumSizeInBytes, SM_SentinelUndef); |
7955 | } else { |
7956 | Ops.push_back(SrcVec); |
7957 | Ops.push_back(N.getOperand(0)); |
7958 | for (int i = 0; i != (int)NumSizeInBytes; ++i) |
7959 | Mask.push_back(NumSizeInBytes + i); |
7960 | } |
7961 | |
7962 | unsigned MinBytesPerElts = MinBitsPerElt / 8; |
7963 | MinBytesPerElts = std::min(MinBytesPerElts, NumBytesPerElt); |
7964 | for (unsigned i = 0; i != MinBytesPerElts; ++i) |
7965 | Mask[DstByte + i] = SrcByte + i; |
7966 | for (unsigned i = MinBytesPerElts; i < NumBytesPerElt; ++i) |
7967 | Mask[DstByte + i] = SM_SentinelZero; |
7968 | return true; |
7969 | } |
7970 | case X86ISD::PACKSS: |
7971 | case X86ISD::PACKUS: { |
7972 | SDValue N0 = N.getOperand(0); |
7973 | SDValue N1 = N.getOperand(1); |
7974 | assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) && |
7975 | N1.getValueType().getVectorNumElements() == (NumElts / 2) && |
7976 | "Unexpected input value type"); |
7977 | |
7978 | APInt EltsLHS, EltsRHS; |
7979 | getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS); |
7980 | |
7981 | |
7982 | |
7983 | bool Offset0 = false, Offset1 = false; |
7984 | if (Opcode == X86ISD::PACKSS) { |
7985 | if ((!(N0.isUndef() || EltsLHS.isNullValue()) && |
7986 | DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) || |
7987 | (!(N1.isUndef() || EltsRHS.isNullValue()) && |
7988 | DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt)) |
7989 | return false; |
7990 | |
7991 | |
7992 | |
7993 | if (N0.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N0.getNode()) && |
7994 | N0.getConstantOperandAPInt(1) == NumBitsPerElt) { |
7995 | Offset0 = true; |
7996 | N0 = N0.getOperand(0); |
7997 | } |
7998 | if (N1.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N1.getNode()) && |
7999 | N1.getConstantOperandAPInt(1) == NumBitsPerElt) { |
8000 | Offset1 = true; |
8001 | N1 = N1.getOperand(0); |
8002 | } |
8003 | } else { |
8004 | APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt); |
8005 | if ((!(N0.isUndef() || EltsLHS.isNullValue()) && |
8006 | !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) || |
8007 | (!(N1.isUndef() || EltsRHS.isNullValue()) && |
8008 | !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1))) |
8009 | return false; |
8010 | } |
8011 | |
8012 | bool IsUnary = (N0 == N1); |
8013 | |
8014 | Ops.push_back(N0); |
8015 | if (!IsUnary) |
8016 | Ops.push_back(N1); |
8017 | |
8018 | createPackShuffleMask(VT, Mask, IsUnary); |
8019 | |
8020 | if (Offset0 || Offset1) { |
8021 | for (int &M : Mask) |
8022 | if ((Offset0 && isInRange(M, 0, NumElts)) || |
8023 | (Offset1 && isInRange(M, NumElts, 2 * NumElts))) |
8024 | ++M; |
8025 | } |
8026 | return true; |
8027 | } |
8028 | case X86ISD::VTRUNC: { |
8029 | SDValue Src = N.getOperand(0); |
8030 | EVT SrcVT = Src.getValueType(); |
8031 | |
8032 | if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 || |
8033 | (SrcVT.getScalarSizeInBits() % 8) != 0) |
8034 | return false; |
8035 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
8036 | unsigned NumBitsPerSrcElt = SrcVT.getScalarSizeInBits(); |
8037 | unsigned Scale = NumBitsPerSrcElt / NumBitsPerElt; |
8038 | assert((NumBitsPerSrcElt % NumBitsPerElt) == 0 && "Illegal truncation"); |
8039 | for (unsigned i = 0; i != NumSrcElts; ++i) |
8040 | Mask.push_back(i * Scale); |
8041 | Mask.append(NumElts - NumSrcElts, SM_SentinelZero); |
8042 | Ops.push_back(Src); |
8043 | return true; |
8044 | } |
8045 | case X86ISD::VSHLI: |
8046 | case X86ISD::VSRLI: { |
8047 | uint64_t ShiftVal = N.getConstantOperandVal(1); |
8048 | |
8049 | if (NumBitsPerElt <= ShiftVal) { |
8050 | Mask.append(NumElts, SM_SentinelZero); |
8051 | return true; |
8052 | } |
8053 | |
8054 | |
8055 | if ((ShiftVal % 8) != 0) |
8056 | break; |
8057 | |
8058 | uint64_t ByteShift = ShiftVal / 8; |
8059 | Ops.push_back(N.getOperand(0)); |
8060 | |
8061 | |
8062 | Mask.append(NumSizeInBytes, SM_SentinelZero); |
8063 | |
8064 | if (X86ISD::VSHLI == Opcode) { |
8065 | for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) |
8066 | for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) |
8067 | Mask[i + j] = i + j - ByteShift; |
8068 | } else { |
8069 | for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) |
8070 | for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) |
8071 | Mask[i + j - ByteShift] = i + j; |
8072 | } |
8073 | return true; |
8074 | } |
8075 | case X86ISD::VROTLI: |
8076 | case X86ISD::VROTRI: { |
8077 | |
8078 | uint64_t RotateVal = N.getConstantOperandAPInt(1).urem(NumBitsPerElt); |
8079 | if ((RotateVal % 8) != 0) |
8080 | return false; |
8081 | Ops.push_back(N.getOperand(0)); |
8082 | int Offset = RotateVal / 8; |
8083 | Offset = (X86ISD::VROTLI == Opcode ? NumBytesPerElt - Offset : Offset); |
8084 | for (int i = 0; i != (int)NumElts; ++i) { |
8085 | int BaseIdx = i * NumBytesPerElt; |
8086 | for (int j = 0; j != (int)NumBytesPerElt; ++j) { |
8087 | Mask.push_back(BaseIdx + ((Offset + j) % NumBytesPerElt)); |
8088 | } |
8089 | } |
8090 | return true; |
8091 | } |
8092 | case X86ISD::VBROADCAST: { |
8093 | SDValue Src = N.getOperand(0); |
8094 | if (!Src.getSimpleValueType().isVector()) { |
8095 | if (Src.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
8096 | !isNullConstant(Src.getOperand(1)) || |
8097 | Src.getOperand(0).getValueType().getScalarType() != |
8098 | VT.getScalarType()) |
8099 | return false; |
8100 | Src = Src.getOperand(0); |
8101 | } |
8102 | Ops.push_back(Src); |
8103 | Mask.append(NumElts, 0); |
8104 | return true; |
8105 | } |
8106 | case ISD::ZERO_EXTEND: |
8107 | case ISD::ANY_EXTEND: |
8108 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
8109 | case ISD::ANY_EXTEND_VECTOR_INREG: { |
8110 | SDValue Src = N.getOperand(0); |
8111 | EVT SrcVT = Src.getValueType(); |
8112 | |
8113 | |
8114 | if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 || |
8115 | (SrcVT.getScalarSizeInBits() % 8) != 0) |
8116 | return false; |
8117 | |
8118 | bool IsAnyExtend = |
8119 | (ISD::ANY_EXTEND == Opcode || ISD::ANY_EXTEND_VECTOR_INREG == Opcode); |
8120 | DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts, |
8121 | IsAnyExtend, Mask); |
8122 | Ops.push_back(Src); |
8123 | return true; |
8124 | } |
8125 | } |
8126 | |
8127 | return false; |
8128 | } |
8129 | |
8130 | |
8131 | static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs, |
8132 | SmallVectorImpl<int> &Mask) { |
8133 | int MaskWidth = Mask.size(); |
8134 | SmallVector<SDValue, 16> UsedInputs; |
8135 | for (int i = 0, e = Inputs.size(); i < e; ++i) { |
8136 | int lo = UsedInputs.size() * MaskWidth; |
8137 | int hi = lo + MaskWidth; |
8138 | |
8139 | |
8140 | if (Inputs[i].isUndef()) |
8141 | for (int &M : Mask) |
8142 | if ((lo <= M) && (M < hi)) |
8143 | M = SM_SentinelUndef; |
8144 | |
8145 | |
8146 | if (none_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) { |
8147 | for (int &M : Mask) |
8148 | if (lo <= M) |
8149 | M -= MaskWidth; |
8150 | continue; |
8151 | } |
8152 | |
8153 | |
8154 | bool IsRepeat = false; |
8155 | for (int j = 0, ue = UsedInputs.size(); j != ue; ++j) { |
8156 | if (UsedInputs[j] != Inputs[i]) |
8157 | continue; |
8158 | for (int &M : Mask) |
8159 | if (lo <= M) |
8160 | M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth); |
8161 | IsRepeat = true; |
8162 | break; |
8163 | } |
8164 | if (IsRepeat) |
8165 | continue; |
8166 | |
8167 | UsedInputs.push_back(Inputs[i]); |
8168 | } |
8169 | Inputs = UsedInputs; |
8170 | } |
8171 | |
8172 | |
8173 | |
8174 | |
8175 | static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, |
8176 | SmallVectorImpl<SDValue> &Inputs, |
8177 | SmallVectorImpl<int> &Mask, |
8178 | APInt &KnownUndef, APInt &KnownZero, |
8179 | const SelectionDAG &DAG, unsigned Depth, |
8180 | bool ResolveKnownElts) { |
8181 | EVT VT = Op.getValueType(); |
8182 | if (!VT.isSimple() || !VT.isVector()) |
8183 | return false; |
8184 | |
8185 | if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) { |
8186 | if (ResolveKnownElts) |
8187 | resolveTargetShuffleFromZeroables(Mask, KnownUndef, KnownZero); |
8188 | return true; |
8189 | } |
8190 | if (getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth, |
8191 | ResolveKnownElts)) { |
8192 | resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero); |
8193 | return true; |
8194 | } |
8195 | return false; |
8196 | } |
8197 | |
8198 | static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs, |
8199 | SmallVectorImpl<int> &Mask, |
8200 | const SelectionDAG &DAG, unsigned Depth = 0, |
8201 | bool ResolveKnownElts = true) { |
8202 | EVT VT = Op.getValueType(); |
8203 | if (!VT.isSimple() || !VT.isVector()) |
8204 | return false; |
8205 | |
8206 | APInt KnownUndef, KnownZero; |
8207 | unsigned NumElts = Op.getValueType().getVectorNumElements(); |
8208 | APInt DemandedElts = APInt::getAllOnesValue(NumElts); |
8209 | return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, KnownUndef, |
8210 | KnownZero, DAG, Depth, ResolveKnownElts); |
8211 | } |
8212 | |
8213 | |
8214 | static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT, |
8215 | EVT MemVT, MemSDNode *Mem, unsigned Offset, |
8216 | SelectionDAG &DAG) { |
8217 | assert((Opcode == X86ISD::VBROADCAST_LOAD || |
8218 | Opcode == X86ISD::SUBV_BROADCAST_LOAD) && |
8219 | "Unknown broadcast load type"); |
8220 | |
8221 | |
8222 | if (!Mem || !Mem->readMem() || !Mem->isSimple() || Mem->isNonTemporal()) |
8223 | return SDValue(); |
8224 | |
8225 | SDValue Ptr = |
8226 | DAG.getMemBasePlusOffset(Mem->getBasePtr(), TypeSize::Fixed(Offset), DL); |
8227 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
8228 | SDValue Ops[] = {Mem->getChain(), Ptr}; |
8229 | SDValue BcstLd = DAG.getMemIntrinsicNode( |
8230 | Opcode, DL, Tys, Ops, MemVT, |
8231 | DAG.getMachineFunction().getMachineMemOperand( |
8232 | Mem->getMemOperand(), Offset, MemVT.getStoreSize())); |
8233 | DAG.makeEquivalentMemoryOrdering(SDValue(Mem, 1), BcstLd.getValue(1)); |
8234 | return BcstLd; |
8235 | } |
8236 | |
8237 | |
8238 | |
8239 | static SDValue getShuffleScalarElt(SDValue Op, unsigned Index, |
8240 | SelectionDAG &DAG, unsigned Depth) { |
8241 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
8242 | return SDValue(); |
8243 | |
8244 | EVT VT = Op.getValueType(); |
8245 | unsigned Opcode = Op.getOpcode(); |
8246 | unsigned NumElems = VT.getVectorNumElements(); |
8247 | |
8248 | |
8249 | if (auto *SV = dyn_cast<ShuffleVectorSDNode>(Op)) { |
8250 | int Elt = SV->getMaskElt(Index); |
8251 | |
8252 | if (Elt < 0) |
8253 | return DAG.getUNDEF(VT.getVectorElementType()); |
8254 | |
8255 | SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1); |
8256 | return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1); |
8257 | } |
8258 | |
8259 | |
8260 | if (isTargetShuffle(Opcode)) { |
8261 | MVT ShufVT = VT.getSimpleVT(); |
8262 | MVT ShufSVT = ShufVT.getVectorElementType(); |
8263 | int NumElems = (int)ShufVT.getVectorNumElements(); |
8264 | SmallVector<int, 16> ShuffleMask; |
8265 | SmallVector<SDValue, 16> ShuffleOps; |
8266 | if (!getTargetShuffleMask(Op.getNode(), ShufVT, true, ShuffleOps, |
8267 | ShuffleMask)) |
8268 | return SDValue(); |
8269 | |
8270 | int Elt = ShuffleMask[Index]; |
8271 | if (Elt == SM_SentinelZero) |
8272 | return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(Op), ShufSVT) |
8273 | : DAG.getConstantFP(+0.0, SDLoc(Op), ShufSVT); |
8274 | if (Elt == SM_SentinelUndef) |
8275 | return DAG.getUNDEF(ShufSVT); |
8276 | |
8277 | assert(0 <= Elt && Elt < (2 * NumElems) && "Shuffle index out of range"); |
8278 | SDValue Src = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1]; |
8279 | return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1); |
8280 | } |
8281 | |
8282 | |
8283 | if (Opcode == ISD::INSERT_SUBVECTOR) { |
8284 | SDValue Vec = Op.getOperand(0); |
8285 | SDValue Sub = Op.getOperand(1); |
8286 | uint64_t SubIdx = Op.getConstantOperandVal(2); |
8287 | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
8288 | |
8289 | if (SubIdx <= Index && Index < (SubIdx + NumSubElts)) |
8290 | return getShuffleScalarElt(Sub, Index - SubIdx, DAG, Depth + 1); |
8291 | return getShuffleScalarElt(Vec, Index, DAG, Depth + 1); |
8292 | } |
8293 | |
8294 | |
8295 | if (Opcode == ISD::CONCAT_VECTORS) { |
8296 | EVT SubVT = Op.getOperand(0).getValueType(); |
8297 | unsigned NumSubElts = SubVT.getVectorNumElements(); |
8298 | uint64_t SubIdx = Index / NumSubElts; |
8299 | uint64_t SubElt = Index % NumSubElts; |
8300 | return getShuffleScalarElt(Op.getOperand(SubIdx), SubElt, DAG, Depth + 1); |
8301 | } |
8302 | |
8303 | |
8304 | if (Opcode == ISD::EXTRACT_SUBVECTOR) { |
8305 | SDValue Src = Op.getOperand(0); |
8306 | uint64_t SrcIdx = Op.getConstantOperandVal(1); |
8307 | return getShuffleScalarElt(Src, Index + SrcIdx, DAG, Depth + 1); |
8308 | } |
8309 | |
8310 | |
8311 | if (Opcode == ISD::BITCAST) { |
8312 | SDValue Src = Op.getOperand(0); |
8313 | EVT SrcVT = Src.getValueType(); |
8314 | if (SrcVT.isVector() && SrcVT.getVectorNumElements() == NumElems) |
8315 | return getShuffleScalarElt(Src, Index, DAG, Depth + 1); |
8316 | return SDValue(); |
8317 | } |
8318 | |
8319 | |
8320 | |
8321 | |
8322 | |
8323 | if (Opcode == ISD::INSERT_VECTOR_ELT && |
8324 | isa<ConstantSDNode>(Op.getOperand(2))) { |
8325 | if (Op.getConstantOperandAPInt(2) == Index) |
8326 | return Op.getOperand(1); |
8327 | return getShuffleScalarElt(Op.getOperand(0), Index, DAG, Depth + 1); |
8328 | } |
8329 | |
8330 | if (Opcode == ISD::SCALAR_TO_VECTOR) |
8331 | return (Index == 0) ? Op.getOperand(0) |
8332 | : DAG.getUNDEF(VT.getVectorElementType()); |
8333 | |
8334 | if (Opcode == ISD::BUILD_VECTOR) |
8335 | return Op.getOperand(Index); |
8336 | |
8337 | return SDValue(); |
8338 | } |
8339 | |
8340 | |
8341 | static SDValue LowerBuildVectorAsInsert(SDValue Op, const APInt &NonZeroMask, |
8342 | unsigned NumNonZero, unsigned NumZero, |
8343 | SelectionDAG &DAG, |
8344 | const X86Subtarget &Subtarget) { |
8345 | MVT VT = Op.getSimpleValueType(); |
8346 | unsigned NumElts = VT.getVectorNumElements(); |
8347 | assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) || |
8348 | ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && |
8349 | "Illegal vector insertion"); |
8350 | |
8351 | SDLoc dl(Op); |
8352 | SDValue V; |
8353 | bool First = true; |
8354 | |
8355 | for (unsigned i = 0; i < NumElts; ++i) { |
8356 | bool IsNonZero = NonZeroMask[i]; |
8357 | if (!IsNonZero) |
8358 | continue; |
8359 | |
8360 | |
8361 | |
8362 | |
8363 | if (First) { |
8364 | First = false; |
8365 | if (NumZero || 0 != i) |
8366 | V = getZeroVector(VT, Subtarget, DAG, dl); |
8367 | else { |
8368 | assert(0 == i && "Expected insertion into zero-index"); |
8369 | V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); |
8370 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); |
8371 | V = DAG.getBitcast(VT, V); |
8372 | continue; |
8373 | } |
8374 | } |
8375 | V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i), |
8376 | DAG.getIntPtrConstant(i, dl)); |
8377 | } |
8378 | |
8379 | return V; |
8380 | } |
8381 | |
8382 | |
8383 | static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask, |
8384 | unsigned NumNonZero, unsigned NumZero, |
8385 | SelectionDAG &DAG, |
8386 | const X86Subtarget &Subtarget) { |
8387 | if (NumNonZero > 8 && !Subtarget.hasSSE41()) |
8388 | return SDValue(); |
8389 | |
8390 | |
8391 | if (Subtarget.hasSSE41()) |
8392 | return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG, |
8393 | Subtarget); |
8394 | |
8395 | SDLoc dl(Op); |
8396 | SDValue V; |
8397 | |
8398 | |
8399 | for (unsigned i = 0; i < 16; i += 2) { |
8400 | bool ThisIsNonZero = NonZeroMask[i]; |
8401 | bool NextIsNonZero = NonZeroMask[i + 1]; |
8402 | if (!ThisIsNonZero && !NextIsNonZero) |
8403 | continue; |
8404 | |
8405 | |
8406 | SDValue Elt; |
8407 | if (ThisIsNonZero) { |
8408 | if (NumZero || NextIsNonZero) |
8409 | Elt = DAG.getZExtOrTrunc(Op.getOperand(i), dl, MVT::i32); |
8410 | else |
8411 | Elt = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); |
8412 | } |
8413 | |
8414 | if (NextIsNonZero) { |
8415 | SDValue NextElt = Op.getOperand(i + 1); |
8416 | if (i == 0 && NumZero) |
8417 | NextElt = DAG.getZExtOrTrunc(NextElt, dl, MVT::i32); |
8418 | else |
8419 | NextElt = DAG.getAnyExtOrTrunc(NextElt, dl, MVT::i32); |
8420 | NextElt = DAG.getNode(ISD::SHL, dl, MVT::i32, NextElt, |
8421 | DAG.getConstant(8, dl, MVT::i8)); |
8422 | if (ThisIsNonZero) |
8423 | Elt = DAG.getNode(ISD::OR, dl, MVT::i32, NextElt, Elt); |
8424 | else |
8425 | Elt = NextElt; |
8426 | } |
8427 | |
8428 | |
8429 | |
8430 | |
8431 | if (!V) { |
8432 | if (i != 0 || NumZero) |
8433 | V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); |
8434 | else { |
8435 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Elt); |
8436 | V = DAG.getBitcast(MVT::v8i16, V); |
8437 | continue; |
8438 | } |
8439 | } |
8440 | Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Elt); |
8441 | V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Elt, |
8442 | DAG.getIntPtrConstant(i / 2, dl)); |
8443 | } |
8444 | |
8445 | return DAG.getBitcast(MVT::v16i8, V); |
8446 | } |
8447 | |
8448 | |
8449 | static SDValue LowerBuildVectorv8i16(SDValue Op, const APInt &NonZeroMask, |
8450 | unsigned NumNonZero, unsigned NumZero, |
8451 | SelectionDAG &DAG, |
8452 | const X86Subtarget &Subtarget) { |
8453 | if (NumNonZero > 4 && !Subtarget.hasSSE41()) |
8454 | return SDValue(); |
8455 | |
8456 | |
8457 | return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG, |
8458 | Subtarget); |
8459 | } |
8460 | |
8461 | |
8462 | static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, |
8463 | const X86Subtarget &Subtarget) { |
8464 | |
8465 | |
8466 | |
8467 | |
8468 | if (Subtarget.hasSSE3() && !Subtarget.hasXOP() && |
8469 | Op.getOperand(0) == Op.getOperand(2) && |
8470 | Op.getOperand(1) == Op.getOperand(3) && |
8471 | Op.getOperand(0) != Op.getOperand(1)) { |
8472 | SDLoc DL(Op); |
8473 | MVT VT = Op.getSimpleValueType(); |
8474 | MVT EltVT = VT.getVectorElementType(); |
8475 | |
8476 | |
8477 | SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), |
8478 | DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; |
8479 | SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops)); |
8480 | SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV); |
8481 | return DAG.getBitcast(VT, Dup); |
8482 | } |
8483 | |
8484 | |
8485 | std::bitset<4> Zeroable, Undefs; |
8486 | for (int i = 0; i < 4; ++i) { |
8487 | SDValue Elt = Op.getOperand(i); |
8488 | Undefs[i] = Elt.isUndef(); |
8489 | Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt)); |
8490 | } |
8491 | assert(Zeroable.size() - Zeroable.count() > 1 && |
8492 | "We expect at least two non-zero elements!"); |
8493 | |
8494 | |
8495 | |
8496 | SDValue FirstNonZero; |
8497 | unsigned FirstNonZeroIdx; |
8498 | for (unsigned i = 0; i < 4; ++i) { |
8499 | if (Zeroable[i]) |
8500 | continue; |
8501 | SDValue Elt = Op.getOperand(i); |
8502 | if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
8503 | !isa<ConstantSDNode>(Elt.getOperand(1))) |
8504 | return SDValue(); |
8505 | |
8506 | MVT VT = Elt.getOperand(0).getSimpleValueType(); |
8507 | if (!VT.is128BitVector()) |
8508 | return SDValue(); |
8509 | if (!FirstNonZero.getNode()) { |
8510 | FirstNonZero = Elt; |
8511 | FirstNonZeroIdx = i; |
8512 | } |
8513 | } |
8514 | |
8515 | assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!"); |
8516 | SDValue V1 = FirstNonZero.getOperand(0); |
8517 | MVT VT = V1.getSimpleValueType(); |
8518 | |
8519 | |
8520 | SDValue Elt; |
8521 | unsigned EltMaskIdx, EltIdx; |
8522 | int Mask[4]; |
8523 | for (EltIdx = 0; EltIdx < 4; ++EltIdx) { |
8524 | if (Zeroable[EltIdx]) { |
8525 | |
8526 | Mask[EltIdx] = EltIdx+4; |
8527 | continue; |
8528 | } |
8529 | |
8530 | Elt = Op->getOperand(EltIdx); |
8531 | |
8532 | EltMaskIdx = Elt.getConstantOperandVal(1); |
8533 | if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx) |
8534 | break; |
8535 | Mask[EltIdx] = EltIdx; |
8536 | } |
8537 | |
8538 | if (EltIdx == 4) { |
8539 | |
8540 | SDValue VZeroOrUndef = (Zeroable == Undefs) |
8541 | ? DAG.getUNDEF(VT) |
8542 | : getZeroVector(VT, Subtarget, DAG, SDLoc(Op)); |
8543 | if (V1.getSimpleValueType() != VT) |
8544 | V1 = DAG.getBitcast(VT, V1); |
8545 | return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask); |
8546 | } |
8547 | |
8548 | |
8549 | if (!Subtarget.hasSSE41()) |
8550 | return SDValue(); |
8551 | |
8552 | SDValue V2 = Elt.getOperand(0); |
8553 | if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx) |
8554 | V1 = SDValue(); |
8555 | |
8556 | bool CanFold = true; |
8557 | for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) { |
8558 | if (Zeroable[i]) |
8559 | continue; |
8560 | |
8561 | SDValue Current = Op->getOperand(i); |
8562 | SDValue SrcVector = Current->getOperand(0); |
8563 | if (!V1.getNode()) |
8564 | V1 = SrcVector; |
8565 | CanFold = (SrcVector == V1) && (Current.getConstantOperandAPInt(1) == i); |
8566 | } |
8567 | |
8568 | if (!CanFold) |
8569 | return SDValue(); |
8570 | |
8571 | assert(V1.getNode() && "Expected at least two non-zero elements!"); |
8572 | if (V1.getSimpleValueType() != MVT::v4f32) |
8573 | V1 = DAG.getBitcast(MVT::v4f32, V1); |
8574 | if (V2.getSimpleValueType() != MVT::v4f32) |
8575 | V2 = DAG.getBitcast(MVT::v4f32, V2); |
8576 | |
8577 | |
8578 | unsigned ZMask = Zeroable.to_ulong(); |
8579 | |
8580 | unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask; |
8581 | assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); |
8582 | SDLoc DL(Op); |
8583 | SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, |
8584 | DAG.getIntPtrConstant(InsertPSMask, DL, true)); |
8585 | return DAG.getBitcast(VT, Result); |
8586 | } |
8587 | |
8588 | |
8589 | static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, |
8590 | SelectionDAG &DAG, const TargetLowering &TLI, |
8591 | const SDLoc &dl) { |
8592 | assert(VT.is128BitVector() && "Unknown type for VShift"); |
8593 | MVT ShVT = MVT::v16i8; |
8594 | unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; |
8595 | SrcOp = DAG.getBitcast(ShVT, SrcOp); |
8596 | assert(NumBits % 8 == 0 && "Only support byte sized shifts"); |
8597 | SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8); |
8598 | return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); |
8599 | } |
8600 | |
8601 | static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, |
8602 | SelectionDAG &DAG) { |
8603 | |
8604 | |
8605 | |
8606 | |
8607 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) { |
8608 | SDValue Ptr = LD->getBasePtr(); |
8609 | if (!ISD::isNormalLoad(LD) || !LD->isSimple()) |
8610 | return SDValue(); |
8611 | EVT PVT = LD->getValueType(0); |
8612 | if (PVT != MVT::i32 && PVT != MVT::f32) |
8613 | return SDValue(); |
8614 | |
8615 | int FI = -1; |
8616 | int64_t Offset = 0; |
8617 | if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { |
8618 | FI = FINode->getIndex(); |
8619 | Offset = 0; |
8620 | } else if (DAG.isBaseWithConstantOffset(Ptr) && |
8621 | isa<FrameIndexSDNode>(Ptr.getOperand(0))) { |
8622 | FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); |
8623 | Offset = Ptr.getConstantOperandVal(1); |
8624 | Ptr = Ptr.getOperand(0); |
8625 | } else { |
8626 | return SDValue(); |
8627 | } |
8628 | |
8629 | |
8630 | |
8631 | Align RequiredAlign(VT.getSizeInBits() / 8); |
8632 | SDValue Chain = LD->getChain(); |
8633 | |
8634 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
8635 | MaybeAlign InferredAlign = DAG.InferPtrAlign(Ptr); |
8636 | if (!InferredAlign || *InferredAlign < RequiredAlign) { |
8637 | if (MFI.isFixedObjectIndex(FI)) { |
8638 | |
8639 | |
8640 | |
8641 | return SDValue(); |
8642 | } else { |
8643 | MFI.setObjectAlignment(FI, RequiredAlign); |
8644 | } |
8645 | } |
8646 | |
8647 | |
8648 | |
8649 | if (Offset < 0) |
8650 | return SDValue(); |
8651 | if ((Offset % RequiredAlign.value()) & 3) |
8652 | return SDValue(); |
8653 | int64_t StartOffset = Offset & ~int64_t(RequiredAlign.value() - 1); |
8654 | if (StartOffset) { |
8655 | SDLoc DL(Ptr); |
8656 | Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
8657 | DAG.getConstant(StartOffset, DL, Ptr.getValueType())); |
8658 | } |
8659 | |
8660 | int EltNo = (Offset - StartOffset) >> 2; |
8661 | unsigned NumElems = VT.getVectorNumElements(); |
8662 | |
8663 | EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); |
8664 | SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, |
8665 | LD->getPointerInfo().getWithOffset(StartOffset)); |
8666 | |
8667 | SmallVector<int, 8> Mask(NumElems, EltNo); |
8668 | |
8669 | return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask); |
8670 | } |
8671 | |
8672 | return SDValue(); |
8673 | } |
8674 | |
8675 | |
8676 | static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) { |
8677 | if (ISD::isNON_EXTLoad(Elt.getNode())) { |
8678 | auto *BaseLd = cast<LoadSDNode>(Elt); |
8679 | if (!BaseLd->isSimple()) |
8680 | return false; |
8681 | Ld = BaseLd; |
8682 | ByteOffset = 0; |
8683 | return true; |
8684 | } |
8685 | |
8686 | switch (Elt.getOpcode()) { |
8687 | case ISD::BITCAST: |
8688 | case ISD::TRUNCATE: |
8689 | case ISD::SCALAR_TO_VECTOR: |
8690 | return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset); |
8691 | case ISD::SRL: |
8692 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) { |
8693 | uint64_t Idx = IdxC->getZExtValue(); |
8694 | if ((Idx % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) { |
8695 | ByteOffset += Idx / 8; |
8696 | return true; |
8697 | } |
8698 | } |
8699 | break; |
8700 | case ISD::EXTRACT_VECTOR_ELT: |
8701 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) { |
8702 | SDValue Src = Elt.getOperand(0); |
8703 | unsigned SrcSizeInBits = Src.getScalarValueSizeInBits(); |
8704 | unsigned DstSizeInBits = Elt.getScalarValueSizeInBits(); |
8705 | if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 && |
8706 | findEltLoadSrc(Src, Ld, ByteOffset)) { |
8707 | uint64_t Idx = IdxC->getZExtValue(); |
8708 | ByteOffset += Idx * (SrcSizeInBits / 8); |
8709 | return true; |
8710 | } |
8711 | } |
8712 | break; |
8713 | } |
8714 | |
8715 | return false; |
8716 | } |
8717 | |
8718 | |
8719 | |
8720 | |
8721 | |
8722 | |
8723 | static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, |
8724 | const SDLoc &DL, SelectionDAG &DAG, |
8725 | const X86Subtarget &Subtarget, |
8726 | bool IsAfterLegalize) { |
8727 | if ((VT.getScalarSizeInBits() % 8) != 0) |
8728 | return SDValue(); |
8729 | |
8730 | unsigned NumElems = Elts.size(); |
8731 | |
8732 | int LastLoadedElt = -1; |
8733 | APInt LoadMask = APInt::getNullValue(NumElems); |
8734 | APInt ZeroMask = APInt::getNullValue(NumElems); |
8735 | APInt UndefMask = APInt::getNullValue(NumElems); |
8736 | |
8737 | SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr); |
8738 | SmallVector<int64_t, 8> ByteOffsets(NumElems, 0); |
8739 | |
8740 | |
8741 | |
8742 | for (unsigned i = 0; i < NumElems; ++i) { |
8743 | SDValue Elt = peekThroughBitcasts(Elts[i]); |
8744 | if (!Elt.getNode()) |
8745 | return SDValue(); |
8746 | if (Elt.isUndef()) { |
8747 | UndefMask.setBit(i); |
8748 | continue; |
8749 | } |
8750 | if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode())) { |
8751 | ZeroMask.setBit(i); |
8752 | continue; |
8753 | } |
8754 | |
8755 | |
8756 | |
8757 | unsigned EltSizeInBits = Elt.getValueSizeInBits(); |
8758 | if ((NumElems * EltSizeInBits) != VT.getSizeInBits()) |
8759 | return SDValue(); |
8760 | |
8761 | if (!findEltLoadSrc(Elt, Loads[i], ByteOffsets[i]) || ByteOffsets[i] < 0) |
8762 | return SDValue(); |
8763 | unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0); |
8764 | if (((ByteOffsets[i] * 8) + EltSizeInBits) > LoadSizeInBits) |
8765 | return SDValue(); |
8766 | |
8767 | LoadMask.setBit(i); |
8768 | LastLoadedElt = i; |
8769 | } |
8770 | assert((ZeroMask.countPopulation() + UndefMask.countPopulation() + |
8771 | LoadMask.countPopulation()) == NumElems && |
8772 | "Incomplete element masks"); |
8773 | |
8774 | |
8775 | if (UndefMask.countPopulation() == NumElems) |
8776 | return DAG.getUNDEF(VT); |
8777 | if ((ZeroMask.countPopulation() + UndefMask.countPopulation()) == NumElems) |
8778 | return VT.isInteger() ? DAG.getConstant(0, DL, VT) |
8779 | : DAG.getConstantFP(0.0, DL, VT); |
8780 | |
8781 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8782 | int FirstLoadedElt = LoadMask.countTrailingZeros(); |
8783 | SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]); |
8784 | EVT EltBaseVT = EltBase.getValueType(); |
8785 | assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() && |
8786 | "Register/Memory size mismatch"); |
8787 | LoadSDNode *LDBase = Loads[FirstLoadedElt]; |
8788 | assert(LDBase && "Did not find base load for merging consecutive loads"); |
8789 | unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits(); |
8790 | unsigned BaseSizeInBytes = BaseSizeInBits / 8; |
8791 | int NumLoadedElts = (1 + LastLoadedElt - FirstLoadedElt); |
8792 | int LoadSizeInBits = NumLoadedElts * BaseSizeInBits; |
8793 | assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected"); |
8794 | |
8795 | |
8796 | if (ByteOffsets[FirstLoadedElt] != 0) |
8797 | return SDValue(); |
8798 | |
8799 | |
8800 | |
8801 | auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) { |
8802 | LoadSDNode *Ld = Loads[EltIdx]; |
8803 | int64_t ByteOffset = ByteOffsets[EltIdx]; |
8804 | if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) { |
8805 | int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes); |
8806 | return (0 <= BaseIdx && BaseIdx < (int)NumElems && LoadMask[BaseIdx] && |
8807 | Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0); |
8808 | } |
8809 | return DAG.areNonVolatileConsecutiveLoads(Ld, Base, BaseSizeInBytes, |
8810 | EltIdx - FirstLoadedElt); |
8811 | }; |
8812 | |
8813 | |
8814 | |
8815 | |
8816 | bool IsConsecutiveLoad = true; |
8817 | bool IsConsecutiveLoadWithZeros = true; |
8818 | for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) { |
8819 | if (LoadMask[i]) { |
8820 | if (!CheckConsecutiveLoad(LDBase, i)) { |
8821 | IsConsecutiveLoad = false; |
8822 | IsConsecutiveLoadWithZeros = false; |
8823 | break; |
8824 | } |
8825 | } else if (ZeroMask[i]) { |
8826 | IsConsecutiveLoad = false; |
8827 | } |
8828 | } |
8829 | |
8830 | auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { |
8831 | auto MMOFlags = LDBase->getMemOperand()->getFlags(); |
8832 | assert(LDBase->isSimple() && |
8833 | "Cannot merge volatile or atomic loads."); |
8834 | SDValue NewLd = |
8835 | DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), |
8836 | LDBase->getPointerInfo(), LDBase->getOriginalAlign(), |
8837 | MMOFlags); |
8838 | for (auto *LD : Loads) |
8839 | if (LD) |
8840 | DAG.makeEquivalentMemoryOrdering(LD, NewLd); |
8841 | return NewLd; |
8842 | }; |
8843 | |
8844 | |
8845 | bool IsDereferenceable = LDBase->getPointerInfo().isDereferenceable( |
8846 | VT.getSizeInBits() / 8, *DAG.getContext(), DAG.getDataLayout()); |
8847 | |
8848 | |
8849 | |
8850 | |
8851 | |
8852 | |
8853 | if (FirstLoadedElt == 0 && |
8854 | (NumLoadedElts == (int)NumElems || IsDereferenceable) && |
8855 | (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) { |
8856 | if (IsAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) |
8857 | return SDValue(); |
8858 | |
8859 | |
8860 | |
8861 | if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 && |
8862 | VT.is256BitVector() && !Subtarget.hasInt256()) |
8863 | return SDValue(); |
8864 | |
8865 | if (NumElems == 1) |
8866 | return DAG.getBitcast(VT, Elts[FirstLoadedElt]); |
8867 | |
8868 | if (!ZeroMask) |
8869 | return CreateLoad(VT, LDBase); |
8870 | |
8871 | |
8872 | |
8873 | if (!IsAfterLegalize && VT.isVector()) { |
8874 | unsigned NumMaskElts = VT.getVectorNumElements(); |
8875 | if ((NumMaskElts % NumElems) == 0) { |
8876 | unsigned Scale = NumMaskElts / NumElems; |
8877 | SmallVector<int, 4> ClearMask(NumMaskElts, -1); |
8878 | for (unsigned i = 0; i < NumElems; ++i) { |
8879 | if (UndefMask[i]) |
8880 | continue; |
8881 | int Offset = ZeroMask[i] ? NumMaskElts : 0; |
8882 | for (unsigned j = 0; j != Scale; ++j) |
8883 | ClearMask[(i * Scale) + j] = (i * Scale) + j + Offset; |
8884 | } |
8885 | SDValue V = CreateLoad(VT, LDBase); |
8886 | SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) |
8887 | : DAG.getConstantFP(0.0, DL, VT); |
8888 | return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); |
8889 | } |
8890 | } |
8891 | } |
8892 | |
8893 | |
8894 | if (VT.is256BitVector() || VT.is512BitVector()) { |
8895 | unsigned HalfNumElems = NumElems / 2; |
8896 | if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) { |
8897 | EVT HalfVT = |
8898 | EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems); |
8899 | SDValue HalfLD = |
8900 | EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL, |
8901 | DAG, Subtarget, IsAfterLegalize); |
8902 | if (HalfLD) |
8903 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), |
8904 | HalfLD, DAG.getIntPtrConstant(0, DL)); |
8905 | } |
8906 | } |
8907 | |
8908 | |
8909 | if (IsConsecutiveLoad && FirstLoadedElt == 0 && |
8910 | ((LoadSizeInBits == 16 && Subtarget.hasFP16()) || LoadSizeInBits == 32 || |
8911 | LoadSizeInBits == 64) && |
8912 | ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) { |
8913 | MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits) |
8914 | : MVT::getIntegerVT(LoadSizeInBits); |
8915 | MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits); |
8916 | |
8917 | |
8918 | if (!Subtarget.hasSSE2() && VT == MVT::v4f32) |
8919 | VecVT = MVT::v4f32; |
8920 | if (TLI.isTypeLegal(VecVT)) { |
8921 | SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); |
8922 | SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; |
8923 | SDValue ResNode = DAG.getMemIntrinsicNode( |
8924 | X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(), |
8925 | LDBase->getOriginalAlign(), MachineMemOperand::MOLoad); |
8926 | for (auto *LD : Loads) |
8927 | if (LD) |
8928 | DAG.makeEquivalentMemoryOrdering(LD, ResNode); |
8929 | return DAG.getBitcast(VT, ResNode); |
8930 | } |
8931 | } |
8932 | |
8933 | |
8934 | |
8935 | if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() && |
8936 | (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) { |
8937 | for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) { |
8938 | unsigned RepeatSize = SubElems * BaseSizeInBits; |
8939 | unsigned ScalarSize = std::min(RepeatSize, 64u); |
8940 | if (!Subtarget.hasAVX2() && ScalarSize < 32) |
8941 | continue; |
8942 | |
8943 | |
8944 | |
8945 | if (RepeatSize > ScalarSize && SubElems == 1) |
8946 | continue; |
8947 | |
8948 | bool Match = true; |
8949 | SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(EltBaseVT)); |
8950 | for (unsigned i = 0; i != NumElems && Match; ++i) { |
8951 | if (!LoadMask[i]) |
8952 | continue; |
8953 | SDValue Elt = peekThroughBitcasts(Elts[i]); |
8954 | if (RepeatedLoads[i % SubElems].isUndef()) |
8955 | RepeatedLoads[i % SubElems] = Elt; |
8956 | else |
8957 | Match &= (RepeatedLoads[i % SubElems] == Elt); |
8958 | } |
8959 | |
8960 | |
8961 | Match &= !RepeatedLoads.front().isUndef(); |
8962 | Match &= !RepeatedLoads.back().isUndef(); |
8963 | if (!Match) |
8964 | continue; |
8965 | |
8966 | EVT RepeatVT = |
8967 | VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64)) |
8968 | ? EVT::getIntegerVT(*DAG.getContext(), ScalarSize) |
8969 | : EVT::getFloatingPointVT(ScalarSize); |
8970 | if (RepeatSize > ScalarSize) |
8971 | RepeatVT = EVT::getVectorVT(*DAG.getContext(), RepeatVT, |
8972 | RepeatSize / ScalarSize); |
8973 | EVT BroadcastVT = |
8974 | EVT::getVectorVT(*DAG.getContext(), RepeatVT.getScalarType(), |
8975 | VT.getSizeInBits() / ScalarSize); |
8976 | if (TLI.isTypeLegal(BroadcastVT)) { |
8977 | if (SDValue RepeatLoad = EltsFromConsecutiveLoads( |
8978 | RepeatVT, RepeatedLoads, DL, DAG, Subtarget, IsAfterLegalize)) { |
8979 | SDValue Broadcast = RepeatLoad; |
8980 | if (RepeatSize > ScalarSize) { |
8981 | while (Broadcast.getValueSizeInBits() < VT.getSizeInBits()) |
8982 | Broadcast = concatSubVectors(Broadcast, Broadcast, DAG, DL); |
8983 | } else { |
8984 | if (!Subtarget.hasAVX2() && |
8985 | !MayFoldLoadIntoBroadcastFromMem( |
8986 | RepeatLoad, RepeatVT.getScalarType().getSimpleVT(), |
8987 | true)) |
8988 | return SDValue(); |
8989 | Broadcast = |
8990 | DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, RepeatLoad); |
8991 | } |
8992 | return DAG.getBitcast(VT, Broadcast); |
8993 | } |
8994 | } |
8995 | } |
8996 | } |
8997 | |
8998 | return SDValue(); |
8999 | } |
9000 | |
9001 | |
9002 | |
9003 | |
9004 | static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL, |
9005 | SelectionDAG &DAG, |
9006 | const X86Subtarget &Subtarget, |
9007 | bool IsAfterLegalize) { |
9008 | SmallVector<SDValue, 64> Elts; |
9009 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { |
9010 | if (SDValue Elt = getShuffleScalarElt(Op, i, DAG, 0)) { |
9011 | Elts.push_back(Elt); |
9012 | continue; |
9013 | } |
9014 | return SDValue(); |
9015 | } |
9016 | assert(Elts.size() == VT.getVectorNumElements()); |
9017 | return EltsFromConsecutiveLoads(VT, Elts, DL, DAG, Subtarget, |
9018 | IsAfterLegalize); |
9019 | } |
9020 | |
9021 | static Constant *getConstantVector(MVT VT, const APInt &SplatValue, |
9022 | unsigned SplatBitSize, LLVMContext &C) { |
9023 | unsigned ScalarSize = VT.getScalarSizeInBits(); |
9024 | unsigned NumElm = SplatBitSize / ScalarSize; |
9025 | |
9026 | SmallVector<Constant *, 32> ConstantVec; |
9027 | for (unsigned i = 0; i < NumElm; i++) { |
9028 | APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); |
9029 | Constant *Const; |
9030 | if (VT.isFloatingPoint()) { |
9031 | if (ScalarSize == 16) { |
9032 | Const = ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val)); |
9033 | } else if (ScalarSize == 32) { |
9034 | Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); |
9035 | } else { |
9036 | assert(ScalarSize == 64 && "Unsupported floating point scalar size"); |
9037 | Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); |
9038 | } |
9039 | } else |
9040 | Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); |
9041 | ConstantVec.push_back(Const); |
9042 | } |
9043 | return ConstantVector::get(ArrayRef<Constant *>(ConstantVec)); |
9044 | } |
9045 | |
9046 | static bool isFoldableUseOfShuffle(SDNode *N) { |
9047 | for (auto *U : N->uses()) { |
9048 | unsigned Opc = U->getOpcode(); |
9049 | |
9050 | if (Opc == X86ISD::VPERMV && U->getOperand(0).getNode() == N) |
9051 | return false; |
9052 | if (Opc == X86ISD::VPERMV3 && U->getOperand(1).getNode() == N) |
9053 | return false; |
9054 | if (isTargetShuffle(Opc)) |
9055 | return true; |
9056 | if (Opc == ISD::BITCAST) |
9057 | return isFoldableUseOfShuffle(U); |
9058 | if (N->hasOneUse()) |
9059 | return true; |
9060 | } |
9061 | return false; |
9062 | } |
9063 | |
9064 | |
9065 | |
9066 | |
9067 | |
9068 | |
9069 | |
9070 | |
9071 | static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, |
9072 | const X86Subtarget &Subtarget, |
9073 | SelectionDAG &DAG) { |
9074 | |
9075 | |
9076 | |
9077 | if (!Subtarget.hasAVX()) |
9078 | return SDValue(); |
9079 | |
9080 | MVT VT = BVOp->getSimpleValueType(0); |
9081 | unsigned NumElts = VT.getVectorNumElements(); |
9082 | SDLoc dl(BVOp); |
9083 | |
9084 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && |
9085 | "Unsupported vector type for broadcast."); |
9086 | |
9087 | |
9088 | SDValue Ld; |
9089 | BitVector UndefElements; |
9090 | SmallVector<SDValue, 16> Sequence; |
9091 | if (BVOp->getRepeatedSequence(Sequence, &UndefElements)) { |
9092 | assert((NumElts % Sequence.size()) == 0 && "Sequence doesn't fit."); |
9093 | if (Sequence.size() == 1) |
9094 | Ld = Sequence[0]; |
9095 | } |
9096 | |
9097 | |
9098 | |
9099 | |
9100 | |
9101 | |
9102 | |
9103 | if (!Sequence.empty() && Subtarget.hasCDI()) { |
9104 | |
9105 | unsigned SeqLen = Sequence.size(); |
9106 | bool UpperZeroOrUndef = |
9107 | SeqLen == 1 || |
9108 | llvm::all_of(makeArrayRef(Sequence).drop_front(), [](SDValue V) { |
9109 | return !V || V.isUndef() || isNullConstant(V); |
9110 | }); |
9111 | SDValue Op0 = Sequence[0]; |
9112 | if (UpperZeroOrUndef && ((Op0.getOpcode() == ISD::BITCAST) || |
9113 | (Op0.getOpcode() == ISD::ZERO_EXTEND && |
9114 | Op0.getOperand(0).getOpcode() == ISD::BITCAST))) { |
9115 | SDValue BOperand = Op0.getOpcode() == ISD::BITCAST |
9116 | ? Op0.getOperand(0) |
9117 | : Op0.getOperand(0).getOperand(0); |
9118 | MVT MaskVT = BOperand.getSimpleValueType(); |
9119 | MVT EltType = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen); |
9120 | if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || |
9121 | (EltType == MVT::i32 && MaskVT == MVT::v16i1)) { |
9122 | MVT BcstVT = MVT::getVectorVT(EltType, NumElts / SeqLen); |
9123 | if (!VT.is512BitVector() && !Subtarget.hasVLX()) { |
9124 | unsigned Scale = 512 / VT.getSizeInBits(); |
9125 | BcstVT = MVT::getVectorVT(EltType, Scale * (NumElts / SeqLen)); |
9126 | } |
9127 | SDValue Bcst = DAG.getNode(X86ISD::VBROADCASTM, dl, BcstVT, BOperand); |
9128 | if (BcstVT.getSizeInBits() != VT.getSizeInBits()) |
9129 | Bcst = extractSubVector(Bcst, 0, DAG, dl, VT.getSizeInBits()); |
9130 | return DAG.getBitcast(VT, Bcst); |
9131 | } |
9132 | } |
9133 | } |
9134 | |
9135 | unsigned NumUndefElts = UndefElements.count(); |
9136 | if (!Ld || (NumElts - NumUndefElts) <= 1) { |
9137 | APInt SplatValue, Undef; |
9138 | unsigned SplatBitSize; |
9139 | bool HasUndef; |
9140 | |
9141 | if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) && |
9142 | SplatBitSize > VT.getScalarSizeInBits() && |
9143 | SplatBitSize < VT.getSizeInBits()) { |
9144 | |
9145 | |
9146 | if (isFoldableUseOfShuffle(BVOp)) |
9147 | return SDValue(); |
9148 | |
9149 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9150 | LLVMContext *Ctx = DAG.getContext(); |
9151 | MVT PVT = TLI.getPointerTy(DAG.getDataLayout()); |
9152 | if (Subtarget.hasAVX()) { |
9153 | if (SplatBitSize == 32 || SplatBitSize == 64 || |
9154 | (SplatBitSize < 32 && Subtarget.hasAVX2())) { |
9155 | |
9156 | |
9157 | MVT CVT = MVT::getIntegerVT(SplatBitSize); |
9158 | Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize); |
9159 | Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue); |
9160 | SDValue CP = DAG.getConstantPool(C, PVT); |
9161 | unsigned Repeat = VT.getSizeInBits() / SplatBitSize; |
9162 | |
9163 | Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); |
9164 | SDVTList Tys = |
9165 | DAG.getVTList(MVT::getVectorVT(CVT, Repeat), MVT::Other); |
9166 | SDValue Ops[] = {DAG.getEntryNode(), CP}; |
9167 | MachinePointerInfo MPI = |
9168 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
9169 | SDValue Brdcst = DAG.getMemIntrinsicNode( |
9170 | X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT, MPI, Alignment, |
9171 | MachineMemOperand::MOLoad); |
9172 | return DAG.getBitcast(VT, Brdcst); |
9173 | } |
9174 | if (SplatBitSize > 64) { |
9175 | |
9176 | Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize, |
9177 | *Ctx); |
9178 | SDValue VCP = DAG.getConstantPool(VecC, PVT); |
9179 | unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits(); |
9180 | MVT VVT = MVT::getVectorVT(VT.getScalarType(), NumElm); |
9181 | Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign(); |
9182 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
9183 | SDValue Ops[] = {DAG.getEntryNode(), VCP}; |
9184 | MachinePointerInfo MPI = |
9185 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
9186 | return DAG.getMemIntrinsicNode( |
9187 | X86ISD::SUBV_BROADCAST_LOAD, dl, Tys, Ops, VVT, MPI, Alignment, |
9188 | MachineMemOperand::MOLoad); |
9189 | } |
9190 | } |
9191 | } |
9192 | |
9193 | |
9194 | |
9195 | |
9196 | |
9197 | |
9198 | if (!Ld || NumElts - NumUndefElts != 1) |
9199 | return SDValue(); |
9200 | unsigned ScalarSize = Ld.getValueSizeInBits(); |
9201 | if (!(UndefElements[0] || (ScalarSize != 32 && ScalarSize != 64))) |
9202 | return SDValue(); |
9203 | } |
9204 | |
9205 | bool ConstSplatVal = |
9206 | (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP); |
9207 | bool IsLoad = ISD::isNormalLoad(Ld.getNode()); |
9208 | |
9209 | |
9210 | |
9211 | |
9212 | |
9213 | |
9214 | if (!ConstSplatVal && !IsLoad && !BVOp->isOnlyUserOf(Ld.getNode())) |
9215 | return SDValue(); |
9216 | |
9217 | unsigned ScalarSize = Ld.getValueSizeInBits(); |
9218 | bool IsGE256 = (VT.getSizeInBits() >= 256); |
9219 | |
9220 | |
9221 | |
9222 | |
9223 | |
9224 | |
9225 | bool OptForSize = DAG.shouldOptForSize(); |
9226 | |
9227 | |
9228 | |
9229 | |
9230 | |
9231 | |
9232 | |
9233 | if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) { |
9234 | EVT CVT = Ld.getValueType(); |
9235 | assert(!CVT.isVector() && "Must not broadcast a vector type"); |
9236 | |
9237 | |
9238 | |
9239 | |
9240 | |
9241 | if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || |
9242 | (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) || |
9243 | (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { |
9244 | const Constant *C = nullptr; |
9245 | if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) |
9246 | C = CI->getConstantIntValue(); |
9247 | else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld)) |
9248 | C = CF->getConstantFPValue(); |
9249 | |
9250 | assert(C && "Invalid constant type"); |
9251 | |
9252 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9253 | SDValue CP = |
9254 | DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); |
9255 | Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); |
9256 | |
9257 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
9258 | SDValue Ops[] = {DAG.getEntryNode(), CP}; |
9259 | MachinePointerInfo MPI = |
9260 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
9261 | return DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT, |
9262 | MPI, Alignment, MachineMemOperand::MOLoad); |
9263 | } |
9264 | } |
9265 | |
9266 | |
9267 | if (!IsLoad && Subtarget.hasInt256() && |
9268 | (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))) |
9269 | return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); |
9270 | |
9271 | |
9272 | if (!IsLoad) |
9273 | return SDValue(); |
9274 | |
9275 | |
9276 | if (!Ld->hasNUsesOfValue(NumElts - NumUndefElts, 0)) |
9277 | return SDValue(); |
9278 | |
9279 | if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || |
9280 | (Subtarget.hasVLX() && ScalarSize == 64)) { |
9281 | auto *LN = cast<LoadSDNode>(Ld); |
9282 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
9283 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
9284 | SDValue BCast = |
9285 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, |
9286 | LN->getMemoryVT(), LN->getMemOperand()); |
9287 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1)); |
9288 | return BCast; |
9289 | } |
9290 | |
9291 | |
9292 | |
9293 | if (Subtarget.hasInt256() && Ld.getValueType().isInteger() && |
9294 | (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) { |
9295 | auto *LN = cast<LoadSDNode>(Ld); |
9296 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
9297 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
9298 | SDValue BCast = |
9299 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, |
9300 | LN->getMemoryVT(), LN->getMemOperand()); |
9301 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1)); |
9302 | return BCast; |
9303 | } |
9304 | |
9305 | if (ScalarSize == 16 && Subtarget.hasFP16() && IsGE256) |
9306 | return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); |
9307 | |
9308 | |
9309 | return SDValue(); |
9310 | } |
9311 | |
9312 | |
9313 | |
9314 | |
9315 | |
9316 | |
9317 | static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, |
9318 | SDValue ExtIdx) { |
9319 | int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue(); |
9320 | if (!isa<ShuffleVectorSDNode>(ExtractedFromVec)) |
9321 | return Idx; |
9322 | |
9323 | |
9324 | |
9325 | |
9326 | |
9327 | |
9328 | |
9329 | |
9330 | |
9331 | |
9332 | |
9333 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec); |
9334 | SDValue ShuffleVec = SVOp->getOperand(0); |
9335 | MVT ShuffleVecVT = ShuffleVec.getSimpleValueType(); |
9336 | assert(ShuffleVecVT.getVectorElementType() == |
9337 | ExtractedFromVec.getSimpleValueType().getVectorElementType()); |
9338 | |
9339 | int ShuffleIdx = SVOp->getMaskElt(Idx); |
9340 | if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) { |
9341 | ExtractedFromVec = ShuffleVec; |
9342 | return ShuffleIdx; |
9343 | } |
9344 | return Idx; |
9345 | } |
9346 | |
9347 | static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { |
9348 | MVT VT = Op.getSimpleValueType(); |
9349 | |
9350 | |
9351 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9352 | if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) |
9353 | return SDValue(); |
9354 | |
9355 | SDLoc DL(Op); |
9356 | unsigned NumElems = Op.getNumOperands(); |
9357 | |
9358 | SDValue VecIn1; |
9359 | SDValue VecIn2; |
9360 | SmallVector<unsigned, 4> InsertIndices; |
9361 | SmallVector<int, 8> Mask(NumElems, -1); |
9362 | |
9363 | for (unsigned i = 0; i != NumElems; ++i) { |
9364 | unsigned Opc = Op.getOperand(i).getOpcode(); |
9365 | |
9366 | if (Opc == ISD::UNDEF) |
9367 | continue; |
9368 | |
9369 | if (Opc != ISD::EXTRACT_VECTOR_ELT) { |
9370 | |
9371 | if (InsertIndices.size() > 1) |
9372 | return SDValue(); |
9373 | |
9374 | InsertIndices.push_back(i); |
9375 | continue; |
9376 | } |
9377 | |
9378 | SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); |
9379 | SDValue ExtIdx = Op.getOperand(i).getOperand(1); |
9380 | |
9381 | |
9382 | if (!isa<ConstantSDNode>(ExtIdx)) |
9383 | return SDValue(); |
9384 | int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx); |
9385 | |
9386 | |
9387 | if (ExtractedFromVec.getValueType() != VT) |
9388 | return SDValue(); |
9389 | |
9390 | if (!VecIn1.getNode()) |
9391 | VecIn1 = ExtractedFromVec; |
9392 | else if (VecIn1 != ExtractedFromVec) { |
9393 | if (!VecIn2.getNode()) |
9394 | VecIn2 = ExtractedFromVec; |
9395 | else if (VecIn2 != ExtractedFromVec) |
9396 | |
9397 | return SDValue(); |
9398 | } |
9399 | |
9400 | if (ExtractedFromVec == VecIn1) |
9401 | Mask[i] = Idx; |
9402 | else if (ExtractedFromVec == VecIn2) |
9403 | Mask[i] = Idx + NumElems; |
9404 | } |
9405 | |
9406 | if (!VecIn1.getNode()) |
9407 | return SDValue(); |
9408 | |
9409 | VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); |
9410 | SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask); |
9411 | |
9412 | for (unsigned Idx : InsertIndices) |
9413 | NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), |
9414 | DAG.getIntPtrConstant(Idx, DL)); |
9415 | |
9416 | return NV; |
9417 | } |
9418 | |
9419 | |
9420 | static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, |
9421 | const X86Subtarget &Subtarget) { |
9422 | |
9423 | MVT VT = Op.getSimpleValueType(); |
9424 | assert((VT.getVectorElementType() == MVT::i1) && |
9425 | "Unexpected type in LowerBUILD_VECTORvXi1!"); |
9426 | |
9427 | SDLoc dl(Op); |
9428 | if (ISD::isBuildVectorAllZeros(Op.getNode()) || |
9429 | ISD::isBuildVectorAllOnes(Op.getNode())) |
9430 | return Op; |
9431 | |
9432 | uint64_t Immediate = 0; |
9433 | SmallVector<unsigned, 16> NonConstIdx; |
9434 | bool IsSplat = true; |
9435 | bool HasConstElts = false; |
9436 | int SplatIdx = -1; |
9437 | for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { |
9438 | SDValue In = Op.getOperand(idx); |
9439 | if (In.isUndef()) |
9440 | continue; |
9441 | if (auto *InC = dyn_cast<ConstantSDNode>(In)) { |
9442 | Immediate |= (InC->getZExtValue() & 0x1) << idx; |
9443 | HasConstElts = true; |
9444 | } else { |
9445 | NonConstIdx.push_back(idx); |
9446 | } |
9447 | if (SplatIdx < 0) |
9448 | SplatIdx = idx; |
9449 | else if (In != Op.getOperand(SplatIdx)) |
9450 | IsSplat = false; |
9451 | } |
9452 | |
9453 | |
9454 | if (IsSplat) { |
9455 | |
9456 | |
9457 | |
9458 | |
9459 | SDValue Cond = Op.getOperand(SplatIdx); |
9460 | assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!"); |
9461 | if (Cond.getOpcode() != ISD::SETCC) |
9462 | Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond, |
9463 | DAG.getConstant(1, dl, MVT::i8)); |
9464 | |
9465 | |
9466 | if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { |
9467 | SDValue Select = DAG.getSelect(dl, MVT::i32, Cond, |
9468 | DAG.getAllOnesConstant(dl, MVT::i32), |
9469 | DAG.getConstant(0, dl, MVT::i32)); |
9470 | Select = DAG.getBitcast(MVT::v32i1, Select); |
9471 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Select, Select); |
9472 | } else { |
9473 | MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U)); |
9474 | SDValue Select = DAG.getSelect(dl, ImmVT, Cond, |
9475 | DAG.getAllOnesConstant(dl, ImmVT), |
9476 | DAG.getConstant(0, dl, ImmVT)); |
9477 | MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; |
9478 | Select = DAG.getBitcast(VecVT, Select); |
9479 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Select, |
9480 | DAG.getIntPtrConstant(0, dl)); |
9481 | } |
9482 | } |
9483 | |
9484 | |
9485 | SDValue DstVec; |
9486 | if (HasConstElts) { |
9487 | if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { |
9488 | SDValue ImmL = DAG.getConstant(Lo_32(Immediate), dl, MVT::i32); |
9489 | SDValue ImmH = DAG.getConstant(Hi_32(Immediate), dl, MVT::i32); |
9490 | ImmL = DAG.getBitcast(MVT::v32i1, ImmL); |
9491 | ImmH = DAG.getBitcast(MVT::v32i1, ImmH); |
9492 | DstVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, ImmL, ImmH); |
9493 | } else { |
9494 | MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U)); |
9495 | SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT); |
9496 | MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; |
9497 | DstVec = DAG.getBitcast(VecVT, Imm); |
9498 | DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec, |
9499 | DAG.getIntPtrConstant(0, dl)); |
9500 | } |
9501 | } else |
9502 | DstVec = DAG.getUNDEF(VT); |
9503 | |
9504 | for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) { |
9505 | unsigned InsertIdx = NonConstIdx[i]; |
9506 | DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, |
9507 | Op.getOperand(InsertIdx), |
9508 | DAG.getIntPtrConstant(InsertIdx, dl)); |
9509 | } |
9510 | return DstVec; |
9511 | } |
9512 | |
9513 | LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) { |
9514 | switch (Opcode) { |
9515 | case X86ISD::PACKSS: |
9516 | case X86ISD::PACKUS: |
9517 | case X86ISD::FHADD: |
9518 | case X86ISD::FHSUB: |
9519 | case X86ISD::HADD: |
9520 | case X86ISD::HSUB: |
9521 | return true; |
9522 | } |
9523 | return false; |
9524 | } |
9525 | |
9526 | |
9527 | |
9528 | |
9529 | |
9530 | |
9531 | |
9532 | |
9533 | |
9534 | |
9535 | |
9536 | |
9537 | |
9538 | |
9539 | |
9540 | |
9541 | |
9542 | |
9543 | |
9544 | |
9545 | |
9546 | static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode, |
9547 | SelectionDAG &DAG, |
9548 | unsigned BaseIdx, unsigned LastIdx, |
9549 | SDValue &V0, SDValue &V1) { |
9550 | EVT VT = N->getValueType(0); |
9551 | assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops"); |
9552 | assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"); |
9553 | assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx && |
9554 | "Invalid Vector in input!"); |
9555 | |
9556 | bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD); |
9557 | bool CanFold = true; |
9558 | unsigned ExpectedVExtractIdx = BaseIdx; |
9559 | unsigned NumElts = LastIdx - BaseIdx; |
9560 | V0 = DAG.getUNDEF(VT); |
9561 | V1 = DAG.getUNDEF(VT); |
9562 | |
9563 | |
9564 | for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) { |
9565 | SDValue Op = N->getOperand(i + BaseIdx); |
9566 | |
9567 | |
9568 | if (Op->isUndef()) { |
9569 | |
9570 | if (i * 2 == NumElts) |
9571 | ExpectedVExtractIdx = BaseIdx; |
9572 | ExpectedVExtractIdx += 2; |
9573 | continue; |
9574 | } |
9575 | |
9576 | CanFold = Op->getOpcode() == Opcode && Op->hasOneUse(); |
9577 | |
9578 | if (!CanFold) |
9579 | break; |
9580 | |
9581 | SDValue Op0 = Op.getOperand(0); |
9582 | SDValue Op1 = Op.getOperand(1); |
9583 | |
9584 | |
9585 | |
9586 | CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
9587 | Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
9588 | Op0.getOperand(0) == Op1.getOperand(0) && |
9589 | isa<ConstantSDNode>(Op0.getOperand(1)) && |
9590 | isa<ConstantSDNode>(Op1.getOperand(1))); |
9591 | if (!CanFold) |
9592 | break; |
9593 | |
9594 | unsigned I0 = Op0.getConstantOperandVal(1); |
9595 | unsigned I1 = Op1.getConstantOperandVal(1); |
9596 | |
9597 | if (i * 2 < NumElts) { |
9598 | if (V0.isUndef()) { |
9599 | V0 = Op0.getOperand(0); |
9600 | if (V0.getValueType() != VT) |
9601 | return false; |
9602 | } |
9603 | } else { |
9604 | if (V1.isUndef()) { |
9605 | V1 = Op0.getOperand(0); |
9606 | if (V1.getValueType() != VT) |
9607 | return false; |
9608 | } |
9609 | if (i * 2 == NumElts) |
9610 | ExpectedVExtractIdx = BaseIdx; |
9611 | } |
9612 | |
9613 | SDValue Expected = (i * 2 < NumElts) ? V0 : V1; |
9614 | if (I0 == ExpectedVExtractIdx) |
9615 | CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected; |
9616 | else if (IsCommutable && I1 == ExpectedVExtractIdx) { |
9617 | |
9618 | |
9619 | CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected; |
9620 | } else |
9621 | CanFold = false; |
9622 | |
9623 | ExpectedVExtractIdx += 2; |
9624 | } |
9625 | |
9626 | return CanFold; |
9627 | } |
9628 | |
9629 | |
9630 | |
9631 | |
9632 | |
9633 | |
9634 | |
9635 | |
9636 | |
9637 | |
9638 | |
9639 | |
9640 | |
9641 | |
9642 | |
9643 | |
9644 | |
9645 | |
9646 | |
9647 | |
9648 | |
9649 | |
9650 | |
9651 | |
9652 | |
9653 | |
9654 | |
9655 | |
9656 | |
9657 | |
9658 | |
9659 | |
9660 | static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, |
9661 | const SDLoc &DL, SelectionDAG &DAG, |
9662 | unsigned X86Opcode, bool Mode, |
9663 | bool isUndefLO, bool isUndefHI) { |
9664 | MVT VT = V0.getSimpleValueType(); |
9665 | assert(VT.is256BitVector() && VT == V1.getSimpleValueType() && |
9666 | "Invalid nodes in input!"); |
9667 | |
9668 | unsigned NumElts = VT.getVectorNumElements(); |
9669 | SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL); |
9670 | SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL); |
9671 | SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL); |
9672 | SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL); |
9673 | MVT NewVT = V0_LO.getSimpleValueType(); |
9674 | |
9675 | SDValue LO = DAG.getUNDEF(NewVT); |
9676 | SDValue HI = DAG.getUNDEF(NewVT); |
9677 | |
9678 | if (Mode) { |
9679 | |
9680 | if (!isUndefLO && !V0->isUndef()) |
9681 | LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI); |
9682 | if (!isUndefHI && !V1->isUndef()) |
9683 | HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI); |
9684 | } else { |
9685 | |
9686 | if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef())) |
9687 | LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO); |
9688 | |
9689 | if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef())) |
9690 | HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI); |
9691 | } |
9692 | |
9693 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); |
9694 | } |
9695 | |
9696 | |
9697 | |
9698 | |
9699 | |
9700 | |
9701 | static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, |
9702 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
9703 | SDValue &Opnd0, SDValue &Opnd1, |
9704 | unsigned &NumExtracts, |
9705 | bool &IsSubAdd) { |
9706 | |
9707 | MVT VT = BV->getSimpleValueType(0); |
9708 | if (!Subtarget.hasSSE3() || !VT.isFloatingPoint()) |
9709 | return false; |
9710 | |
9711 | unsigned NumElts = VT.getVectorNumElements(); |
9712 | SDValue InVec0 = DAG.getUNDEF(VT); |
9713 | SDValue InVec1 = DAG.getUNDEF(VT); |
9714 | |
9715 | NumExtracts = 0; |
9716 | |
9717 | |
9718 | |
9719 | |
9720 | |
9721 | unsigned Opc[2] = {0, 0}; |
9722 | for (unsigned i = 0, e = NumElts; i != e; ++i) { |
9723 | SDValue Op = BV->getOperand(i); |
9724 | |
9725 | |
9726 | unsigned Opcode = Op.getOpcode(); |
9727 | if (Opcode == ISD::UNDEF) |
9728 | continue; |
9729 | |
9730 | |
9731 | if (Opcode != ISD::FADD && Opcode != ISD::FSUB) |
9732 | return false; |
9733 | |
9734 | SDValue Op0 = Op.getOperand(0); |
9735 | SDValue Op1 = Op.getOperand(1); |
9736 | |
9737 | |
9738 | |
9739 | |
9740 | if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
9741 | Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
9742 | !isa<ConstantSDNode>(Op0.getOperand(1)) || |
9743 | Op0.getOperand(1) != Op1.getOperand(1)) |
9744 | return false; |
9745 | |
9746 | unsigned I0 = Op0.getConstantOperandVal(1); |
9747 | if (I0 != i) |
9748 | return false; |
9749 | |
9750 | |
9751 | |
9752 | if (Opc[i % 2] != 0 && Opc[i % 2] != Opcode) |
9753 | return false; |
9754 | Opc[i % 2] = Opcode; |
9755 | |
9756 | |
9757 | if (InVec0.isUndef()) { |
9758 | InVec0 = Op0.getOperand(0); |
9759 | if (InVec0.getSimpleValueType() != VT) |
9760 | return false; |
9761 | } |
9762 | if (InVec1.isUndef()) { |
9763 | InVec1 = Op1.getOperand(0); |
9764 | if (InVec1.getSimpleValueType() != VT) |
9765 | return false; |
9766 | } |
9767 | |
9768 | |
9769 | |
9770 | if (InVec0 != Op0.getOperand(0)) { |
9771 | if (Opcode == ISD::FSUB) |
9772 | return false; |
9773 | |
9774 | |
9775 | |
9776 | std::swap(Op0, Op1); |
9777 | if (InVec0 != Op0.getOperand(0)) |
9778 | return false; |
9779 | } |
9780 | |
9781 | if (InVec1 != Op1.getOperand(0)) |
9782 | return false; |
9783 | |
9784 | |
9785 | ++NumExtracts; |
9786 | } |
9787 | |
9788 | |
9789 | |
9790 | |
9791 | if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] || |
9792 | InVec0.isUndef() || InVec1.isUndef()) |
9793 | return false; |
9794 | |
9795 | IsSubAdd = Opc[0] == ISD::FADD; |
9796 | |
9797 | Opnd0 = InVec0; |
9798 | Opnd1 = InVec1; |
9799 | return true; |
9800 | } |
9801 | |
9802 | |
9803 | |
9804 | |
9805 | |
9806 | |
9807 | |
9808 | |
9809 | |
9810 | |
9811 | |
9812 | |
9813 | |
9814 | |
9815 | |
9816 | |
9817 | |
9818 | |
9819 | |
9820 | |
9821 | |
9822 | |
9823 | |
9824 | |
9825 | |
9826 | |
9827 | |
9828 | static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, |
9829 | SelectionDAG &DAG, |
9830 | SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2, |
9831 | unsigned ExpectedUses) { |
9832 | if (Opnd0.getOpcode() != ISD::FMUL || |
9833 | !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA()) |
9834 | return false; |
9835 | |
9836 | |
9837 | |
9838 | |
9839 | |
9840 | const TargetOptions &Options = DAG.getTarget().Options; |
9841 | bool AllowFusion = |
9842 | (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); |
9843 | if (!AllowFusion) |
9844 | return false; |
9845 | |
9846 | Opnd2 = Opnd1; |
9847 | Opnd1 = Opnd0.getOperand(1); |
9848 | Opnd0 = Opnd0.getOperand(0); |
9849 | |
9850 | return true; |
9851 | } |
9852 | |
9853 | |
9854 | |
9855 | |
9856 | static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, |
9857 | const X86Subtarget &Subtarget, |
9858 | SelectionDAG &DAG) { |
9859 | SDValue Opnd0, Opnd1; |
9860 | unsigned NumExtracts; |
9861 | bool IsSubAdd; |
9862 | if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, |
9863 | IsSubAdd)) |
9864 | return SDValue(); |
9865 | |
9866 | MVT VT = BV->getSimpleValueType(0); |
9867 | SDLoc DL(BV); |
9868 | |
9869 | |
9870 | SDValue Opnd2; |
9871 | if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) { |
9872 | unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; |
9873 | return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); |
9874 | } |
9875 | |
9876 | |
9877 | if (IsSubAdd) |
9878 | return SDValue(); |
9879 | |
9880 | |
9881 | |
9882 | |
9883 | |
9884 | |
9885 | if (VT.is512BitVector()) |
9886 | return SDValue(); |
9887 | |
9888 | return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); |
9889 | } |
9890 | |
9891 | static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG, |
9892 | unsigned &HOpcode, SDValue &V0, SDValue &V1) { |
9893 | |
9894 | MVT VT = BV->getSimpleValueType(0); |
9895 | HOpcode = ISD::DELETED_NODE; |
9896 | V0 = DAG.getUNDEF(VT); |
9897 | V1 = DAG.getUNDEF(VT); |
9898 | |
9899 | |
9900 | |
9901 | |
9902 | unsigned NumElts = VT.getVectorNumElements(); |
9903 | unsigned GenericOpcode = ISD::DELETED_NODE; |
9904 | unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1; |
9905 | unsigned NumEltsIn128Bits = NumElts / Num128BitChunks; |
9906 | unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2; |
9907 | for (unsigned i = 0; i != Num128BitChunks; ++i) { |
9908 | for (unsigned j = 0; j != NumEltsIn128Bits; ++j) { |
9909 | |
9910 | SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j); |
9911 | if (Op.isUndef()) |
9912 | continue; |
9913 | |
9914 | |
9915 | if (HOpcode != ISD::DELETED_NODE && Op.getOpcode() != GenericOpcode) |
9916 | return false; |
9917 | |
9918 | |
9919 | if (HOpcode == ISD::DELETED_NODE) { |
9920 | GenericOpcode = Op.getOpcode(); |
9921 | switch (GenericOpcode) { |
9922 | case ISD::ADD: HOpcode = X86ISD::HADD; break; |
9923 | case ISD::SUB: HOpcode = X86ISD::HSUB; break; |
9924 | case ISD::FADD: HOpcode = X86ISD::FHADD; break; |
9925 | case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; |
9926 | default: return false; |
9927 | } |
9928 | } |
9929 | |
9930 | SDValue Op0 = Op.getOperand(0); |
9931 | SDValue Op1 = Op.getOperand(1); |
9932 | if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
9933 | Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
9934 | Op0.getOperand(0) != Op1.getOperand(0) || |
9935 | !isa<ConstantSDNode>(Op0.getOperand(1)) || |
9936 | !isa<ConstantSDNode>(Op1.getOperand(1)) || !Op.hasOneUse()) |
9937 | return false; |
9938 | |
9939 | |
9940 | |
9941 | if (j < NumEltsIn64Bits) { |
9942 | if (V0.isUndef()) |
9943 | V0 = Op0.getOperand(0); |
9944 | } else { |
9945 | if (V1.isUndef()) |
9946 | V1 = Op0.getOperand(0); |
9947 | } |
9948 | |
9949 | SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1; |
9950 | if (SourceVec != Op0.getOperand(0)) |
9951 | return false; |
9952 | |
9953 | |
9954 | unsigned ExtIndex0 = Op0.getConstantOperandVal(1); |
9955 | unsigned ExtIndex1 = Op1.getConstantOperandVal(1); |
9956 | unsigned ExpectedIndex = i * NumEltsIn128Bits + |
9957 | (j % NumEltsIn64Bits) * 2; |
9958 | if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1) |
9959 | continue; |
9960 | |
9961 | |
9962 | if (GenericOpcode != ISD::ADD && GenericOpcode != ISD::FADD) |
9963 | return false; |
9964 | |
9965 | |
9966 | |
9967 | if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1) |
9968 | continue; |
9969 | |
9970 | |
9971 | return false; |
9972 | } |
9973 | } |
9974 | |
9975 | return true; |
9976 | } |
9977 | |
9978 | static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, |
9979 | SelectionDAG &DAG, unsigned HOpcode, |
9980 | SDValue V0, SDValue V1) { |
9981 | |
9982 | |
9983 | |
9984 | MVT VT = BV->getSimpleValueType(0); |
9985 | unsigned Width = VT.getSizeInBits(); |
9986 | if (V0.getValueSizeInBits() > Width) |
9987 | V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), Width); |
9988 | else if (V0.getValueSizeInBits() < Width) |
9989 | V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV), Width); |
9990 | |
9991 | if (V1.getValueSizeInBits() > Width) |
9992 | V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), Width); |
9993 | else if (V1.getValueSizeInBits() < Width) |
9994 | V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width); |
9995 | |
9996 | unsigned NumElts = VT.getVectorNumElements(); |
9997 | APInt DemandedElts = APInt::getAllOnesValue(NumElts); |
9998 | for (unsigned i = 0; i != NumElts; ++i) |
9999 | if (BV->getOperand(i).isUndef()) |
10000 | DemandedElts.clearBit(i); |
10001 | |
10002 | |
10003 | unsigned HalfNumElts = NumElts / 2; |
10004 | if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) { |
10005 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
10006 | V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128); |
10007 | V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128); |
10008 | SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1); |
10009 | return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256); |
10010 | } |
10011 | |
10012 | return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1); |
10013 | } |
10014 | |
10015 | |
10016 | static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, |
10017 | const X86Subtarget &Subtarget, |
10018 | SelectionDAG &DAG) { |
10019 | |
10020 | unsigned NumNonUndefs = |
10021 | count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); }); |
10022 | if (NumNonUndefs < 2) |
10023 | return SDValue(); |
10024 | |
10025 | |
10026 | |
10027 | |
10028 | MVT VT = BV->getSimpleValueType(0); |
10029 | if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) || |
10030 | ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) || |
10031 | ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) || |
10032 | ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) { |
10033 | unsigned HOpcode; |
10034 | SDValue V0, V1; |
10035 | if (isHopBuildVector(BV, DAG, HOpcode, V0, V1)) |
10036 | return getHopForBuildVector(BV, DAG, HOpcode, V0, V1); |
10037 | } |
10038 | |
10039 | |
10040 | if (!Subtarget.hasAVX() || !VT.is256BitVector()) |
10041 | return SDValue(); |
10042 | |
10043 | |
10044 | unsigned NumElts = VT.getVectorNumElements(); |
10045 | unsigned Half = NumElts / 2; |
10046 | unsigned NumUndefsLO = 0; |
10047 | unsigned NumUndefsHI = 0; |
10048 | for (unsigned i = 0, e = Half; i != e; ++i) |
10049 | if (BV->getOperand(i)->isUndef()) |
10050 | NumUndefsLO++; |
10051 | |
10052 | for (unsigned i = Half, e = NumElts; i != e; ++i) |
10053 | if (BV->getOperand(i)->isUndef()) |
10054 | NumUndefsHI++; |
10055 | |
10056 | SDLoc DL(BV); |
10057 | SDValue InVec0, InVec1; |
10058 | if (VT == MVT::v8i32 || VT == MVT::v16i16) { |
10059 | SDValue InVec2, InVec3; |
10060 | unsigned X86Opcode; |
10061 | bool CanFold = true; |
10062 | |
10063 | if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) && |
10064 | isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2, |
10065 | InVec3) && |
10066 | ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && |
10067 | ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) |
10068 | X86Opcode = X86ISD::HADD; |
10069 | else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0, |
10070 | InVec1) && |
10071 | isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2, |
10072 | InVec3) && |
10073 | ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && |
10074 | ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) |
10075 | X86Opcode = X86ISD::HSUB; |
10076 | else |
10077 | CanFold = false; |
10078 | |
10079 | if (CanFold) { |
10080 | |
10081 | |
10082 | if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) |
10083 | return SDValue(); |
10084 | |
10085 | |
10086 | |
10087 | |
10088 | SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0; |
10089 | SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1; |
10090 | assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?"); |
10091 | bool isUndefLO = NumUndefsLO == Half; |
10092 | bool isUndefHI = NumUndefsHI == Half; |
10093 | return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO, |
10094 | isUndefHI); |
10095 | } |
10096 | } |
10097 | |
10098 | if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || |
10099 | VT == MVT::v16i16) { |
10100 | unsigned X86Opcode; |
10101 | if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) |
10102 | X86Opcode = X86ISD::HADD; |
10103 | else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0, |
10104 | InVec1)) |
10105 | X86Opcode = X86ISD::HSUB; |
10106 | else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0, |
10107 | InVec1)) |
10108 | X86Opcode = X86ISD::FHADD; |
10109 | else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, |
10110 | InVec1)) |
10111 | X86Opcode = X86ISD::FHSUB; |
10112 | else |
10113 | return SDValue(); |
10114 | |
10115 | |
10116 | |
10117 | if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) |
10118 | return SDValue(); |
10119 | |
10120 | |
10121 | |
10122 | bool isUndefLO = NumUndefsLO == Half; |
10123 | bool isUndefHI = NumUndefsHI == Half; |
10124 | return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true, |
10125 | isUndefLO, isUndefHI); |
10126 | } |
10127 | |
10128 | return SDValue(); |
10129 | } |
10130 | |
10131 | static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, |
10132 | SelectionDAG &DAG); |
10133 | |
10134 | |
10135 | |
10136 | |
10137 | |
10138 | |
10139 | |
10140 | static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, |
10141 | const X86Subtarget &Subtarget, |
10142 | SelectionDAG &DAG) { |
10143 | SDLoc DL(Op); |
10144 | MVT VT = Op->getSimpleValueType(0); |
10145 | unsigned NumElems = VT.getVectorNumElements(); |
10146 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
10147 | |
10148 | |
10149 | |
10150 | unsigned Opcode = Op->getOperand(0).getOpcode(); |
10151 | for (unsigned i = 1; i < NumElems; ++i) |
10152 | if (Opcode != Op->getOperand(i).getOpcode()) |
10153 | return SDValue(); |
10154 | |
10155 | |
10156 | bool IsShift = false; |
10157 | switch (Opcode) { |
10158 | default: |
10159 | return SDValue(); |
10160 | case ISD::SHL: |
10161 | case ISD::SRL: |
10162 | case ISD::SRA: |
10163 | IsShift = true; |
10164 | break; |
10165 | case ISD::AND: |
10166 | case ISD::XOR: |
10167 | case ISD::OR: |
10168 | |
10169 | |
10170 | if (Op->getSplatValue()) |
10171 | return SDValue(); |
10172 | if (!TLI.isOperationLegalOrPromote(Opcode, VT)) |
10173 | return SDValue(); |
10174 | break; |
10175 | } |
10176 | |
10177 | SmallVector<SDValue, 4> LHSElts, RHSElts; |
10178 | for (SDValue Elt : Op->ops()) { |
10179 | SDValue LHS = Elt.getOperand(0); |
10180 | SDValue RHS = Elt.getOperand(1); |
10181 | |
10182 | |
10183 | if (!isa<ConstantSDNode>(RHS)) |
10184 | return SDValue(); |
10185 | |
10186 | |
10187 | if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) { |
10188 | if (!IsShift) |
10189 | return SDValue(); |
10190 | RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType()); |
10191 | } |
10192 | |
10193 | LHSElts.push_back(LHS); |
10194 | RHSElts.push_back(RHS); |
10195 | } |
10196 | |
10197 | |
10198 | |
10199 | |
10200 | if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; })) |
10201 | return SDValue(); |
10202 | |
10203 | SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts); |
10204 | SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts); |
10205 | SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS); |
10206 | |
10207 | if (!IsShift) |
10208 | return Res; |
10209 | |
10210 | |
10211 | |
10212 | return LowerShift(Res, Subtarget, DAG); |
10213 | } |
10214 | |
10215 | |
10216 | |
10217 | |
10218 | static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, |
10219 | const X86Subtarget &Subtarget) { |
10220 | SDLoc DL(Op); |
10221 | MVT VT = Op.getSimpleValueType(); |
10222 | |
10223 | |
10224 | if (ISD::isBuildVectorAllZeros(Op.getNode())) |
10225 | return Op; |
10226 | |
10227 | |
10228 | |
10229 | |
10230 | if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { |
10231 | if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) |
10232 | return Op; |
10233 | |
10234 | return getOnesVector(VT, DAG, DL); |
10235 | } |
10236 | |
10237 | return SDValue(); |
10238 | } |
10239 | |
10240 | |
10241 | |
10242 | |
10243 | static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, |
10244 | SDLoc &DL, SelectionDAG &DAG, |
10245 | const X86Subtarget &Subtarget) { |
10246 | MVT ShuffleVT = VT; |
10247 | EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); |
10248 | unsigned NumElts = VT.getVectorNumElements(); |
10249 | unsigned SizeInBits = VT.getSizeInBits(); |
10250 | |
10251 | |
10252 | assert(IndicesVec.getValueType().getVectorNumElements() >= NumElts && |
10253 | "Illegal variable permute mask size"); |
10254 | if (IndicesVec.getValueType().getVectorNumElements() > NumElts) { |
10255 | |
10256 | if (IndicesVec.getValueSizeInBits() > SizeInBits) |
10257 | IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec), |
10258 | NumElts * VT.getScalarSizeInBits()); |
10259 | else if (IndicesVec.getValueSizeInBits() < SizeInBits) |
10260 | IndicesVec = widenSubVector(IndicesVec, false, Subtarget, DAG, |
10261 | SDLoc(IndicesVec), SizeInBits); |
10262 | |
10263 | if (IndicesVec.getValueType().getVectorNumElements() > NumElts) |
10264 | IndicesVec = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(IndicesVec), |
10265 | IndicesVT, IndicesVec); |
10266 | } |
10267 | IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT); |
10268 | |
10269 | |
10270 | if (SrcVec.getValueSizeInBits() != SizeInBits) { |
10271 | if ((SrcVec.getValueSizeInBits() % SizeInBits) == 0) { |
10272 | |
10273 | unsigned Scale = SrcVec.getValueSizeInBits() / SizeInBits; |
10274 | VT = MVT::getVectorVT(VT.getScalarType(), Scale * NumElts); |
10275 | IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); |
10276 | IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false, |
10277 | Subtarget, DAG, SDLoc(IndicesVec)); |
10278 | SDValue NewSrcVec = |
10279 | createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); |
10280 | if (NewSrcVec) |
10281 | return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits); |
10282 | return SDValue(); |
10283 | } else if (SrcVec.getValueSizeInBits() < SizeInBits) { |
10284 | |
10285 | SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec)); |
10286 | } else |
10287 | return SDValue(); |
10288 | } |
10289 | |
10290 | auto ScaleIndices = [&DAG](SDValue Idx, uint64_t Scale) { |
10291 | assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale"); |
10292 | EVT SrcVT = Idx.getValueType(); |
10293 | unsigned NumDstBits = SrcVT.getScalarSizeInBits() / Scale; |
10294 | uint64_t IndexScale = 0; |
10295 | uint64_t IndexOffset = 0; |
10296 | |
10297 | |
10298 | |
10299 | |
10300 | |
10301 | |
10302 | for (uint64_t i = 0; i != Scale; ++i) { |
10303 | IndexScale |= Scale << (i * NumDstBits); |
10304 | IndexOffset |= i << (i * NumDstBits); |
10305 | } |
10306 | |
10307 | Idx = DAG.getNode(ISD::MUL, SDLoc(Idx), SrcVT, Idx, |
10308 | DAG.getConstant(IndexScale, SDLoc(Idx), SrcVT)); |
10309 | Idx = DAG.getNode(ISD::ADD, SDLoc(Idx), SrcVT, Idx, |
10310 | DAG.getConstant(IndexOffset, SDLoc(Idx), SrcVT)); |
10311 | return Idx; |
10312 | }; |
10313 | |
10314 | unsigned Opcode = 0; |
10315 | switch (VT.SimpleTy) { |
10316 | default: |
10317 | break; |
10318 | case MVT::v16i8: |
10319 | if (Subtarget.hasSSSE3()) |
10320 | Opcode = X86ISD::PSHUFB; |
10321 | break; |
10322 | case MVT::v8i16: |
10323 | if (Subtarget.hasVLX() && Subtarget.hasBWI()) |
10324 | Opcode = X86ISD::VPERMV; |
10325 | else if (Subtarget.hasSSSE3()) { |
10326 | Opcode = X86ISD::PSHUFB; |
10327 | ShuffleVT = MVT::v16i8; |
10328 | } |
10329 | break; |
10330 | case MVT::v4f32: |
10331 | case MVT::v4i32: |
10332 | if (Subtarget.hasAVX()) { |
10333 | Opcode = X86ISD::VPERMILPV; |
10334 | ShuffleVT = MVT::v4f32; |
10335 | } else if (Subtarget.hasSSSE3()) { |
10336 | Opcode = X86ISD::PSHUFB; |
10337 | ShuffleVT = MVT::v16i8; |
10338 | } |
10339 | break; |
10340 | case MVT::v2f64: |
10341 | case MVT::v2i64: |
10342 | if (Subtarget.hasAVX()) { |
10343 | |
10344 | IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); |
10345 | Opcode = X86ISD::VPERMILPV; |
10346 | ShuffleVT = MVT::v2f64; |
10347 | } else if (Subtarget.hasSSE41()) { |
10348 | |
10349 | return DAG.getSelectCC( |
10350 | DL, IndicesVec, |
10351 | getZeroVector(IndicesVT.getSimpleVT(), Subtarget, DAG, DL), |
10352 | DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {0, 0}), |
10353 | DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {1, 1}), |
10354 | ISD::CondCode::SETEQ); |
10355 | } |
10356 | break; |
10357 | case MVT::v32i8: |
10358 | if (Subtarget.hasVLX() && Subtarget.hasVBMI()) |
10359 | Opcode = X86ISD::VPERMV; |
10360 | else if (Subtarget.hasXOP()) { |
10361 | SDValue LoSrc = extract128BitVector(SrcVec, 0, DAG, DL); |
10362 | SDValue HiSrc = extract128BitVector(SrcVec, 16, DAG, DL); |
10363 | SDValue LoIdx = extract128BitVector(IndicesVec, 0, DAG, DL); |
10364 | SDValue HiIdx = extract128BitVector(IndicesVec, 16, DAG, DL); |
10365 | return DAG.getNode( |
10366 | ISD::CONCAT_VECTORS, DL, VT, |
10367 | DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, LoIdx), |
10368 | DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, HiIdx)); |
10369 | } else if (Subtarget.hasAVX()) { |
10370 | SDValue Lo = extract128BitVector(SrcVec, 0, DAG, DL); |
10371 | SDValue Hi = extract128BitVector(SrcVec, 16, DAG, DL); |
10372 | SDValue LoLo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Lo); |
10373 | SDValue HiHi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Hi, Hi); |
10374 | auto PSHUFBBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
10375 | ArrayRef<SDValue> Ops) { |
10376 | |
10377 | |
10378 | |
10379 | SDValue Idx = Ops[2]; |
10380 | EVT VT = Idx.getValueType(); |
10381 | return DAG.getSelectCC(DL, Idx, DAG.getConstant(15, DL, VT), |
10382 | DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[1], Idx), |
10383 | DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[0], Idx), |
10384 | ISD::CondCode::SETGT); |
10385 | }; |
10386 | SDValue Ops[] = {LoLo, HiHi, IndicesVec}; |
10387 | return SplitOpsAndApply(DAG, Subtarget, DL, MVT::v32i8, Ops, |
10388 | PSHUFBBuilder); |
10389 | } |
10390 | break; |
10391 | case MVT::v16i16: |
10392 | if (Subtarget.hasVLX() && Subtarget.hasBWI()) |
10393 | Opcode = X86ISD::VPERMV; |
10394 | else if (Subtarget.hasAVX()) { |
10395 | |
10396 | IndicesVec = ScaleIndices(IndicesVec, 2); |
10397 | return DAG.getBitcast( |
10398 | VT, createVariablePermute( |
10399 | MVT::v32i8, DAG.getBitcast(MVT::v32i8, SrcVec), |
10400 | DAG.getBitcast(MVT::v32i8, IndicesVec), DL, DAG, Subtarget)); |
10401 | } |
10402 | break; |
10403 | case MVT::v8f32: |
10404 | case MVT::v8i32: |
10405 | if (Subtarget.hasAVX2()) |
10406 | Opcode = X86ISD::VPERMV; |
10407 | else if (Subtarget.hasAVX()) { |
10408 | SrcVec = DAG.getBitcast(MVT::v8f32, SrcVec); |
10409 | SDValue LoLo = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, |
10410 | {0, 1, 2, 3, 0, 1, 2, 3}); |
10411 | SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, |
10412 | {4, 5, 6, 7, 4, 5, 6, 7}); |
10413 | if (Subtarget.hasXOP()) |
10414 | return DAG.getBitcast( |
10415 | VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi, |
10416 | IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); |
10417 | |
10418 | |
10419 | SDValue Res = DAG.getSelectCC( |
10420 | DL, IndicesVec, DAG.getConstant(3, DL, MVT::v8i32), |
10421 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, HiHi, IndicesVec), |
10422 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, LoLo, IndicesVec), |
10423 | ISD::CondCode::SETGT); |
10424 | return DAG.getBitcast(VT, Res); |
10425 | } |
10426 | break; |
10427 | case MVT::v4i64: |
10428 | case MVT::v4f64: |
10429 | if (Subtarget.hasAVX512()) { |
10430 | if (!Subtarget.hasVLX()) { |
10431 | MVT WidenSrcVT = MVT::getVectorVT(VT.getScalarType(), 8); |
10432 | SrcVec = widenSubVector(WidenSrcVT, SrcVec, false, Subtarget, DAG, |
10433 | SDLoc(SrcVec)); |
10434 | IndicesVec = widenSubVector(MVT::v8i64, IndicesVec, false, Subtarget, |
10435 | DAG, SDLoc(IndicesVec)); |
10436 | SDValue Res = createVariablePermute(WidenSrcVT, SrcVec, IndicesVec, DL, |
10437 | DAG, Subtarget); |
10438 | return extract256BitVector(Res, 0, DAG, DL); |
10439 | } |
10440 | Opcode = X86ISD::VPERMV; |
10441 | } else if (Subtarget.hasAVX()) { |
10442 | SrcVec = DAG.getBitcast(MVT::v4f64, SrcVec); |
10443 | SDValue LoLo = |
10444 | DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {0, 1, 0, 1}); |
10445 | SDValue HiHi = |
10446 | DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {2, 3, 2, 3}); |
10447 | |
10448 | IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); |
10449 | if (Subtarget.hasXOP()) |
10450 | return DAG.getBitcast( |
10451 | VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi, |
10452 | IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); |
10453 | |
10454 | |
10455 | SDValue Res = DAG.getSelectCC( |
10456 | DL, IndicesVec, DAG.getConstant(2, DL, MVT::v4i64), |
10457 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, HiHi, IndicesVec), |
10458 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, LoLo, IndicesVec), |
10459 | ISD::CondCode::SETGT); |
10460 | return DAG.getBitcast(VT, Res); |
10461 | } |
10462 | break; |
10463 | case MVT::v64i8: |
10464 | if (Subtarget.hasVBMI()) |
10465 | Opcode = X86ISD::VPERMV; |
10466 | break; |
10467 | case MVT::v32i16: |
10468 | if (Subtarget.hasBWI()) |
10469 | Opcode = X86ISD::VPERMV; |
10470 | break; |
10471 | case MVT::v16f32: |
10472 | case MVT::v16i32: |
10473 | case MVT::v8f64: |
10474 | case MVT::v8i64: |
10475 | if (Subtarget.hasAVX512()) |
10476 | Opcode = X86ISD::VPERMV; |
10477 | break; |
10478 | } |
10479 | if (!Opcode) |
10480 | return SDValue(); |
10481 | |
10482 | assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) && |
10483 | (VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 && |
10484 | "Illegal variable permute shuffle type"); |
10485 | |
10486 | uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits(); |
10487 | if (Scale > 1) |
10488 | IndicesVec = ScaleIndices(IndicesVec, Scale); |
10489 | |
10490 | EVT ShuffleIdxVT = EVT(ShuffleVT).changeVectorElementTypeToInteger(); |
10491 | IndicesVec = DAG.getBitcast(ShuffleIdxVT, IndicesVec); |
10492 | |
10493 | SrcVec = DAG.getBitcast(ShuffleVT, SrcVec); |
10494 | SDValue Res = Opcode == X86ISD::VPERMV |
10495 | ? DAG.getNode(Opcode, DL, ShuffleVT, IndicesVec, SrcVec) |
10496 | : DAG.getNode(Opcode, DL, ShuffleVT, SrcVec, IndicesVec); |
10497 | return DAG.getBitcast(VT, Res); |
10498 | } |
10499 | |
10500 | |
10501 | |
10502 | |
10503 | |
10504 | |
10505 | |
10506 | |
10507 | |
10508 | |
10509 | |
10510 | |
10511 | static SDValue |
10512 | LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, |
10513 | const X86Subtarget &Subtarget) { |
10514 | SDValue SrcVec, IndicesVec; |
10515 | |
10516 | |
10517 | |
10518 | for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) { |
10519 | SDValue Op = V.getOperand(Idx); |
10520 | if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
10521 | return SDValue(); |
10522 | |
10523 | |
10524 | |
10525 | |
10526 | if (!SrcVec) |
10527 | SrcVec = Op.getOperand(0); |
10528 | else if (SrcVec != Op.getOperand(0)) |
10529 | return SDValue(); |
10530 | SDValue ExtractedIndex = Op->getOperand(1); |
10531 | |
10532 | if (ExtractedIndex.getOpcode() == ISD::ZERO_EXTEND || |
10533 | ExtractedIndex.getOpcode() == ISD::SIGN_EXTEND) |
10534 | ExtractedIndex = ExtractedIndex.getOperand(0); |
10535 | if (ExtractedIndex.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
10536 | return SDValue(); |
10537 | |
10538 | |
10539 | |
10540 | |
10541 | if (!IndicesVec) |
10542 | IndicesVec = ExtractedIndex.getOperand(0); |
10543 | else if (IndicesVec != ExtractedIndex.getOperand(0)) |
10544 | return SDValue(); |
10545 | |
10546 | auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.getOperand(1)); |
10547 | if (!PermIdx || PermIdx->getAPIntValue() != Idx) |
10548 | return SDValue(); |
10549 | } |
10550 | |
10551 | SDLoc DL(V); |
10552 | MVT VT = V.getSimpleValueType(); |
10553 | return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); |
10554 | } |
10555 | |
10556 | SDValue |
10557 | X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { |
10558 | SDLoc dl(Op); |
10559 | |
10560 | MVT VT = Op.getSimpleValueType(); |
10561 | MVT EltVT = VT.getVectorElementType(); |
10562 | unsigned NumElems = Op.getNumOperands(); |
10563 | |
10564 | |
10565 | if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) |
10566 | return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); |
10567 | |
10568 | if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) |
10569 | return VectorConstant; |
10570 | |
10571 | unsigned EVTBits = EltVT.getSizeInBits(); |
10572 | APInt UndefMask = APInt::getNullValue(NumElems); |
10573 | APInt ZeroMask = APInt::getNullValue(NumElems); |
10574 | APInt NonZeroMask = APInt::getNullValue(NumElems); |
10575 | bool IsAllConstants = true; |
10576 | SmallSet<SDValue, 8> Values; |
10577 | unsigned NumConstants = NumElems; |
10578 | for (unsigned i = 0; i < NumElems; ++i) { |
10579 | SDValue Elt = Op.getOperand(i); |
10580 | if (Elt.isUndef()) { |
10581 | UndefMask.setBit(i); |
10582 | continue; |
10583 | } |
10584 | Values.insert(Elt); |
10585 | if (!isa<ConstantSDNode>(Elt) && !isa<ConstantFPSDNode>(Elt)) { |
10586 | IsAllConstants = false; |
10587 | NumConstants--; |
10588 | } |
10589 | if (X86::isZeroNode(Elt)) { |
10590 | ZeroMask.setBit(i); |
10591 | } else { |
10592 | NonZeroMask.setBit(i); |
10593 | } |
10594 | } |
10595 | |
10596 | |
10597 | if (NonZeroMask == 0) { |
10598 | assert(UndefMask.isAllOnesValue() && "Fully undef mask expected"); |
10599 | return DAG.getUNDEF(VT); |
10600 | } |
10601 | |
10602 | BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode()); |
10603 | |
10604 | |
10605 | |
10606 | if ((VT.is256BitVector() || VT.is512BitVector()) && |
10607 | !isFoldableUseOfShuffle(BV)) { |
10608 | unsigned UpperElems = NumElems / 2; |
10609 | APInt UndefOrZeroMask = UndefMask | ZeroMask; |
10610 | unsigned NumUpperUndefsOrZeros = UndefOrZeroMask.countLeadingOnes(); |
10611 | if (NumUpperUndefsOrZeros >= UpperElems) { |
10612 | if (VT.is512BitVector() && |
10613 | NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4))) |
10614 | UpperElems = NumElems - (NumElems / 4); |
10615 | bool UndefUpper = UndefMask.countLeadingOnes() >= UpperElems; |
10616 | MVT LowerVT = MVT::getVectorVT(EltVT, NumElems - UpperElems); |
10617 | SDValue NewBV = |
10618 | DAG.getBuildVector(LowerVT, dl, Op->ops().drop_back(UpperElems)); |
10619 | return widenSubVector(VT, NewBV, !UndefUpper, Subtarget, DAG, dl); |
10620 | } |
10621 | } |
10622 | |
10623 | if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG)) |
10624 | return AddSub; |
10625 | if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) |
10626 | return HorizontalOp; |
10627 | if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG)) |
10628 | return Broadcast; |
10629 | if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG)) |
10630 | return BitOp; |
10631 | |
10632 | unsigned NumZero = ZeroMask.countPopulation(); |
10633 | unsigned NumNonZero = NonZeroMask.countPopulation(); |
10634 | |
10635 | |
10636 | |
10637 | |
10638 | |
10639 | |
10640 | |
10641 | if (NumConstants == NumElems - 1 && NumNonZero != 1 && |
10642 | (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) || |
10643 | isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) { |
10644 | |
10645 | |
10646 | |
10647 | LLVMContext &Context = *DAG.getContext(); |
10648 | Type *EltType = Op.getValueType().getScalarType().getTypeForEVT(Context); |
10649 | SmallVector<Constant *, 16> ConstVecOps(NumElems, UndefValue::get(EltType)); |
10650 | SDValue VarElt; |
10651 | SDValue InsIndex; |
10652 | for (unsigned i = 0; i != NumElems; ++i) { |
10653 | SDValue Elt = Op.getOperand(i); |
10654 | if (auto *C = dyn_cast<ConstantSDNode>(Elt)) |
10655 | ConstVecOps[i] = ConstantInt::get(Context, C->getAPIntValue()); |
10656 | else if (auto *C = dyn_cast<ConstantFPSDNode>(Elt)) |
10657 | ConstVecOps[i] = ConstantFP::get(Context, C->getValueAPF()); |
10658 | else if (!Elt.isUndef()) { |
10659 | assert(!VarElt.getNode() && !InsIndex.getNode() && |
10660 | "Expected one variable element in this vector"); |
10661 | VarElt = Elt; |
10662 | InsIndex = DAG.getVectorIdxConstant(i, dl); |
10663 | } |
10664 | } |
10665 | Constant *CV = ConstantVector::get(ConstVecOps); |
10666 | SDValue DAGConstVec = DAG.getConstantPool(CV, VT); |
10667 | |
10668 | |
10669 | |
10670 | |
10671 | |
10672 | |
10673 | |
10674 | SDValue LegalDAGConstVec = LowerConstantPool(DAGConstVec, DAG); |
10675 | MachineFunction &MF = DAG.getMachineFunction(); |
10676 | MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF); |
10677 | SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI); |
10678 | unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue(); |
10679 | unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits(); |
10680 | if (InsertC < NumEltsInLow128Bits) |
10681 | return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); |
10682 | |
10683 | |
10684 | |
10685 | assert(VT.getSizeInBits() > 128 && "Invalid insertion index?"); |
10686 | assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector"); |
10687 | SmallVector<int, 8> ShuffleMask; |
10688 | unsigned NumElts = VT.getVectorNumElements(); |
10689 | for (unsigned i = 0; i != NumElts; ++i) |
10690 | ShuffleMask.push_back(i == InsertC ? NumElts : i); |
10691 | SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt); |
10692 | return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask); |
10693 | } |
10694 | |
10695 | |
10696 | if (NumNonZero == 1) { |
10697 | unsigned Idx = NonZeroMask.countTrailingZeros(); |
10698 | SDValue Item = Op.getOperand(Idx); |
10699 | |
10700 | |
10701 | |
10702 | |
10703 | |
10704 | if (Idx == 0) { |
10705 | if (NumZero == 0) |
10706 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); |
10707 | |
10708 | if (EltVT == MVT::i32 || EltVT == MVT::f16 || EltVT == MVT::f32 || |
10709 | EltVT == MVT::f64 || (EltVT == MVT::i64 && Subtarget.is64Bit()) || |
10710 | (EltVT == MVT::i16 && Subtarget.hasFP16())) { |
10711 | assert((VT.is128BitVector() || VT.is256BitVector() || |
10712 | VT.is512BitVector()) && |
10713 | "Expected an SSE value type!"); |
10714 | Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); |
10715 | |
10716 | |
10717 | return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); |
10718 | } |
10719 | |
10720 | |
10721 | |
10722 | if (EltVT == MVT::i16 || EltVT == MVT::i8) { |
10723 | Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); |
10724 | MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); |
10725 | Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item); |
10726 | Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); |
10727 | return DAG.getBitcast(VT, Item); |
10728 | } |
10729 | } |
10730 | |
10731 | |
10732 | if (NumElems == 2 && Idx == 1 && |
10733 | X86::isZeroNode(Op.getOperand(0)) && |
10734 | !X86::isZeroNode(Op.getOperand(1))) { |
10735 | unsigned NumBits = VT.getSizeInBits(); |
10736 | return getVShift(true, VT, |
10737 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, |
10738 | VT, Op.getOperand(1)), |
10739 | NumBits/2, DAG, *this, dl); |
10740 | } |
10741 | |
10742 | if (IsAllConstants) |
10743 | return SDValue(); |
10744 | |
10745 | |
10746 | |
10747 | |
10748 | |
10749 | |
10750 | if (EVTBits == 32) { |
10751 | Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); |
10752 | return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG); |
10753 | } |
10754 | } |
10755 | |
10756 | |
10757 | if (Values.size() == 1) { |
10758 | if (EVTBits == 32) { |
10759 | |
10760 | |
10761 | |
10762 | |
10763 | unsigned Idx = NonZeroMask.countTrailingZeros(); |
10764 | SDValue Item = Op.getOperand(Idx); |
10765 | if (Op.getNode()->isOnlyUserOf(Item.getNode())) |
10766 | return LowerAsSplatVectorLoad(Item, VT, dl, DAG); |
10767 | } |
10768 | return SDValue(); |
10769 | } |
10770 | |
10771 | |
10772 | |
10773 | if (IsAllConstants) |
10774 | return SDValue(); |
10775 | |
10776 | if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget)) |
10777 | return V; |
10778 | |
10779 | |
10780 | { |
10781 | SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems); |
10782 | if (SDValue LD = |
10783 | EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) |
10784 | return LD; |
10785 | } |
10786 | |
10787 | |
10788 | |
10789 | |
10790 | if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) { |
10791 | SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), |
10792 | DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; |
10793 | auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef<SDValue> Ops) { |
10794 | |
10795 | for (unsigned i = 2; i != NumElems; ++i) |
10796 | if (Ops[i % 2] != Op.getOperand(i)) |
10797 | return false; |
10798 | return true; |
10799 | }; |
10800 | if (CanSplat(Op, NumElems, Ops)) { |
10801 | MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64; |
10802 | MVT NarrowVT = MVT::getVectorVT(EltVT, 4); |
10803 | |
10804 | SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2), |
10805 | DAG.getBuildVector(NarrowVT, dl, Ops)); |
10806 | |
10807 | MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems / 2); |
10808 | return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, |
10809 | NewBV)); |
10810 | } |
10811 | } |
10812 | |
10813 | |
10814 | |
10815 | if (VT.getSizeInBits() > 128) { |
10816 | MVT HVT = MVT::getVectorVT(EltVT, NumElems / 2); |
10817 | |
10818 | |
10819 | SDValue Lower = |
10820 | DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2)); |
10821 | SDValue Upper = DAG.getBuildVector( |
10822 | HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2)); |
10823 | |
10824 | |
10825 | return concatSubVectors(Lower, Upper, DAG, dl); |
10826 | } |
10827 | |
10828 | |
10829 | if (EVTBits == 64) { |
10830 | if (NumNonZero == 1) { |
10831 | |
10832 | unsigned Idx = NonZeroMask.countTrailingZeros(); |
10833 | SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, |
10834 | Op.getOperand(Idx)); |
10835 | return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); |
10836 | } |
10837 | return SDValue(); |
10838 | } |
10839 | |
10840 | |
10841 | if (EVTBits == 8 && NumElems == 16) |
10842 | if (SDValue V = LowerBuildVectorv16i8(Op, NonZeroMask, NumNonZero, NumZero, |
10843 | DAG, Subtarget)) |
10844 | return V; |
10845 | |
10846 | if (EltVT == MVT::i16 && NumElems == 8) |
10847 | if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero, |
10848 | DAG, Subtarget)) |
10849 | return V; |
10850 | |
10851 | |
10852 | if (EVTBits == 32 && NumElems == 4) |
10853 | if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget)) |
10854 | return V; |
10855 | |
10856 | |
10857 | if (NumElems == 4 && NumZero > 0) { |
10858 | SmallVector<SDValue, 8> Ops(NumElems); |
10859 | for (unsigned i = 0; i < 4; ++i) { |
10860 | bool isZero = !NonZeroMask[i]; |
10861 | if (isZero) |
10862 | Ops[i] = getZeroVector(VT, Subtarget, DAG, dl); |
10863 | else |
10864 | Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); |
10865 | } |
10866 | |
10867 | for (unsigned i = 0; i < 2; ++i) { |
10868 | switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) { |
10869 | default: llvm_unreachable("Unexpected NonZero count"); |
10870 | case 0: |
10871 | Ops[i] = Ops[i*2]; |
10872 | break; |
10873 | case 1: |
10874 | Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]); |
10875 | break; |
10876 | case 2: |
10877 | Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); |
10878 | break; |
10879 | case 3: |
10880 | Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); |
10881 | break; |
10882 | } |
10883 | } |
10884 | |
10885 | bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2; |
10886 | bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2; |
10887 | int MaskVec[] = { |
10888 | Reverse1 ? 1 : 0, |
10889 | Reverse1 ? 0 : 1, |
10890 | static_cast<int>(Reverse2 ? NumElems+1 : NumElems), |
10891 | static_cast<int>(Reverse2 ? NumElems : NumElems+1) |
10892 | }; |
10893 | return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec); |
10894 | } |
10895 | |
10896 | assert(Values.size() > 1 && "Expected non-undef and non-splat vector"); |
10897 | |
10898 | |
10899 | if (SDValue Sh = buildFromShuffleMostly(Op, DAG)) |
10900 | return Sh; |
10901 | |
10902 | |
10903 | if (Subtarget.hasSSE41() && EltVT != MVT::f16) { |
10904 | SDValue Result; |
10905 | if (!Op.getOperand(0).isUndef()) |
10906 | Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); |
10907 | else |
10908 | Result = DAG.getUNDEF(VT); |
10909 | |
10910 | for (unsigned i = 1; i < NumElems; ++i) { |
10911 | if (Op.getOperand(i).isUndef()) continue; |
10912 | Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, |
10913 | Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); |
10914 | } |
10915 | return Result; |
10916 | } |
10917 | |
10918 | |
10919 | |
10920 | |
10921 | SmallVector<SDValue, 8> Ops(NumElems); |
10922 | for (unsigned i = 0; i < NumElems; ++i) { |
10923 | if (!Op.getOperand(i).isUndef()) |
10924 | Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); |
10925 | else |
10926 | Ops[i] = DAG.getUNDEF(VT); |
10927 | } |
10928 | |
10929 | |
10930 | |
10931 | |
10932 | |
10933 | for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { |
10934 | |
10935 | SmallVector<int, 16> Mask; |
10936 | for(unsigned i = 0; i != Scale; ++i) |
10937 | Mask.push_back(i); |
10938 | for (unsigned i = 0; i != Scale; ++i) |
10939 | Mask.push_back(NumElems+i); |
10940 | Mask.append(NumElems - Mask.size(), SM_SentinelUndef); |
10941 | |
10942 | for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) |
10943 | Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); |
10944 | } |
10945 | return Ops[0]; |
10946 | } |
10947 | |
10948 | |
10949 | |
10950 | |
10951 | static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, |
10952 | const X86Subtarget &Subtarget) { |
10953 | SDLoc dl(Op); |
10954 | MVT ResVT = Op.getSimpleValueType(); |
10955 | |
10956 | assert((ResVT.is256BitVector() || |
10957 | ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); |
10958 | |
10959 | unsigned NumOperands = Op.getNumOperands(); |
10960 | unsigned NumZero = 0; |
10961 | unsigned NumNonZero = 0; |
10962 | unsigned NonZeros = 0; |
10963 | for (unsigned i = 0; i != NumOperands; ++i) { |
10964 | SDValue SubVec = Op.getOperand(i); |
10965 | if (SubVec.isUndef()) |
10966 | continue; |
10967 | if (ISD::isBuildVectorAllZeros(SubVec.getNode())) |
10968 | ++NumZero; |
10969 | else { |
10970 | assert(i < sizeof(NonZeros) * CHAR_BIT); |
10971 | NonZeros |= 1 << i; |
10972 | ++NumNonZero; |
10973 | } |
10974 | } |
10975 | |
10976 | |
10977 | if (NumNonZero > 2) { |
10978 | MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); |
10979 | ArrayRef<SDUse> Ops = Op->ops(); |
10980 | SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
10981 | Ops.slice(0, NumOperands/2)); |
10982 | SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
10983 | Ops.slice(NumOperands/2)); |
10984 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); |
10985 | } |
10986 | |
10987 | |
10988 | SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) |
10989 | : DAG.getUNDEF(ResVT); |
10990 | |
10991 | MVT SubVT = Op.getOperand(0).getSimpleValueType(); |
10992 | unsigned NumSubElems = SubVT.getVectorNumElements(); |
10993 | for (unsigned i = 0; i != NumOperands; ++i) { |
10994 | if ((NonZeros & (1 << i)) == 0) |
10995 | continue; |
10996 | |
10997 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, |
10998 | Op.getOperand(i), |
10999 | DAG.getIntPtrConstant(i * NumSubElems, dl)); |
11000 | } |
11001 | |
11002 | return Vec; |
11003 | } |
11004 | |
11005 | |
11006 | |
11007 | |
11008 | |
11009 | static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, |
11010 | const X86Subtarget &Subtarget, |
11011 | SelectionDAG & DAG) { |
11012 | SDLoc dl(Op); |
11013 | MVT ResVT = Op.getSimpleValueType(); |
11014 | unsigned NumOperands = Op.getNumOperands(); |
11015 | |
11016 | assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && |
11017 | "Unexpected number of operands in CONCAT_VECTORS"); |
11018 | |
11019 | uint64_t Zeros = 0; |
11020 | uint64_t NonZeros = 0; |
11021 | for (unsigned i = 0; i != NumOperands; ++i) { |
11022 | SDValue SubVec = Op.getOperand(i); |
11023 | if (SubVec.isUndef()) |
11024 | continue; |
11025 | assert(i < sizeof(NonZeros) * CHAR_BIT); |
11026 | if (ISD::isBuildVectorAllZeros(SubVec.getNode())) |
11027 | Zeros |= (uint64_t)1 << i; |
11028 | else |
11029 | NonZeros |= (uint64_t)1 << i; |
11030 | } |
11031 | |
11032 | unsigned NumElems = ResVT.getVectorNumElements(); |
11033 | |
11034 | |
11035 | |
11036 | |
11037 | if (isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros && |
11038 | Log2_64(NonZeros) != NumOperands - 1) { |
11039 | MVT ShiftVT = ResVT; |
11040 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) |
11041 | ShiftVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
11042 | unsigned Idx = Log2_64(NonZeros); |
11043 | SDValue SubVec = Op.getOperand(Idx); |
11044 | unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); |
11045 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ShiftVT, |
11046 | DAG.getUNDEF(ShiftVT), SubVec, |
11047 | DAG.getIntPtrConstant(0, dl)); |
11048 | Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec, |
11049 | DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8)); |
11050 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op, |
11051 | DAG.getIntPtrConstant(0, dl)); |
11052 | } |
11053 | |
11054 | |
11055 | if (NonZeros == 0 || isPowerOf2_64(NonZeros)) { |
11056 | SDValue Vec = Zeros ? DAG.getConstant(0, dl, ResVT) : DAG.getUNDEF(ResVT); |
11057 | if (!NonZeros) |
11058 | return Vec; |
11059 | unsigned Idx = Log2_64(NonZeros); |
11060 | SDValue SubVec = Op.getOperand(Idx); |
11061 | unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); |
11062 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec, |
11063 | DAG.getIntPtrConstant(Idx * SubVecNumElts, dl)); |
11064 | } |
11065 | |
11066 | if (NumOperands > 2) { |
11067 | MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); |
11068 | ArrayRef<SDUse> Ops = Op->ops(); |
11069 | SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
11070 | Ops.slice(0, NumOperands/2)); |
11071 | SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
11072 | Ops.slice(NumOperands/2)); |
11073 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); |
11074 | } |
11075 | |
11076 | assert(countPopulation(NonZeros) == 2 && "Simple cases not handled?"); |
11077 | |
11078 | if (ResVT.getVectorNumElements() >= 16) |
11079 | return Op; |
11080 | |
11081 | SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, |
11082 | DAG.getUNDEF(ResVT), Op.getOperand(0), |
11083 | DAG.getIntPtrConstant(0, dl)); |
11084 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), |
11085 | DAG.getIntPtrConstant(NumElems/2, dl)); |
11086 | } |
11087 | |
11088 | static SDValue LowerCONCAT_VECTORS(SDValue Op, |
11089 | const X86Subtarget &Subtarget, |
11090 | SelectionDAG &DAG) { |
11091 | MVT VT = Op.getSimpleValueType(); |
11092 | if (VT.getVectorElementType() == MVT::i1) |
11093 | return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG); |
11094 | |
11095 | assert((VT.is256BitVector() && Op.getNumOperands() == 2) || |
11096 | (VT.is512BitVector() && (Op.getNumOperands() == 2 || |
11097 | Op.getNumOperands() == 4))); |
11098 | |
11099 | |
11100 | |
11101 | |
11102 | |
11103 | return LowerAVXCONCAT_VECTORS(Op, DAG, Subtarget); |
11104 | } |
11105 | |
11106 | |
11107 | |
11108 | |
11109 | |
11110 | |
11111 | |
11112 | |
11113 | |
11114 | |
11115 | |
11116 | |
11117 | |
11118 | |
11119 | |
11120 | |
11121 | |
11122 | |
11123 | |
11124 | static bool isNoopShuffleMask(ArrayRef<int> Mask) { |
11125 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
11126 | assert(Mask[i] >= -1 && "Out of bound mask element!"); |
11127 | if (Mask[i] >= 0 && Mask[i] != i) |
11128 | return false; |
11129 | } |
11130 | return true; |
11131 | } |
11132 | |
11133 | |
11134 | |
11135 | |
11136 | |
11137 | |
11138 | static bool isLaneCrossingShuffleMask(unsigned LaneSizeInBits, |
11139 | unsigned ScalarSizeInBits, |
11140 | ArrayRef<int> Mask) { |
11141 | assert(LaneSizeInBits && ScalarSizeInBits && |
11142 | (LaneSizeInBits % ScalarSizeInBits) == 0 && |
11143 | "Illegal shuffle lane size"); |
11144 | int LaneSize = LaneSizeInBits / ScalarSizeInBits; |
11145 | int Size = Mask.size(); |
11146 | for (int i = 0; i < Size; ++i) |
11147 | if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) |
11148 | return true; |
11149 | return false; |
11150 | } |
11151 | |
11152 | |
11153 | |
11154 | static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) { |
11155 | return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask); |
11156 | } |
11157 | |
11158 | |
11159 | |
11160 | |
11161 | static bool isMultiLaneShuffleMask(unsigned LaneSizeInBits, |
11162 | unsigned ScalarSizeInBits, |
11163 | ArrayRef<int> Mask) { |
11164 | assert(LaneSizeInBits && ScalarSizeInBits && |
11165 | (LaneSizeInBits % ScalarSizeInBits) == 0 && |
11166 | "Illegal shuffle lane size"); |
11167 | int NumElts = Mask.size(); |
11168 | int NumEltsPerLane = LaneSizeInBits / ScalarSizeInBits; |
11169 | int NumLanes = NumElts / NumEltsPerLane; |
11170 | if (NumLanes > 1) { |
11171 | for (int i = 0; i != NumLanes; ++i) { |
11172 | int SrcLane = -1; |
11173 | for (int j = 0; j != NumEltsPerLane; ++j) { |
11174 | int M = Mask[(i * NumEltsPerLane) + j]; |
11175 | if (M < 0) |
11176 | continue; |
11177 | int Lane = (M % NumElts) / NumEltsPerLane; |
11178 | if (SrcLane >= 0 && SrcLane != Lane) |
11179 | return true; |
11180 | SrcLane = Lane; |
11181 | } |
11182 | } |
11183 | } |
11184 | return false; |
11185 | } |
11186 | |
11187 | |
11188 | |
11189 | |
11190 | |
11191 | |
11192 | |
11193 | |
11194 | |
11195 | |
11196 | |
11197 | |
11198 | static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, |
11199 | ArrayRef<int> Mask, |
11200 | SmallVectorImpl<int> &RepeatedMask) { |
11201 | auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); |
11202 | RepeatedMask.assign(LaneSize, -1); |
11203 | int Size = Mask.size(); |
11204 | for (int i = 0; i < Size; ++i) { |
11205 | assert(Mask[i] == SM_SentinelUndef || Mask[i] >= 0); |
11206 | if (Mask[i] < 0) |
11207 | continue; |
11208 | if ((Mask[i] % Size) / LaneSize != i / LaneSize) |
11209 | |
11210 | return false; |
11211 | |
11212 | |
11213 | |
11214 | int LocalM = Mask[i] < Size ? Mask[i] % LaneSize |
11215 | : Mask[i] % LaneSize + LaneSize; |
11216 | if (RepeatedMask[i % LaneSize] < 0) |
11217 | |
11218 | RepeatedMask[i % LaneSize] = LocalM; |
11219 | else if (RepeatedMask[i % LaneSize] != LocalM) |
11220 | |
11221 | return false; |
11222 | } |
11223 | return true; |
11224 | } |
11225 | |
11226 | |
11227 | static bool |
11228 | is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, |
11229 | SmallVectorImpl<int> &RepeatedMask) { |
11230 | return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); |
11231 | } |
11232 | |
11233 | static bool |
11234 | is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) { |
11235 | SmallVector<int, 32> RepeatedMask; |
11236 | return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); |
11237 | } |
11238 | |
11239 | |
11240 | static bool |
11241 | is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, |
11242 | SmallVectorImpl<int> &RepeatedMask) { |
11243 | return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask); |
11244 | } |
11245 | |
11246 | |
11247 | |
11248 | static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, |
11249 | unsigned EltSizeInBits, |
11250 | ArrayRef<int> Mask, |
11251 | SmallVectorImpl<int> &RepeatedMask) { |
11252 | int LaneSize = LaneSizeInBits / EltSizeInBits; |
11253 | RepeatedMask.assign(LaneSize, SM_SentinelUndef); |
11254 | int Size = Mask.size(); |
11255 | for (int i = 0; i < Size; ++i) { |
11256 | assert(isUndefOrZero(Mask[i]) || (Mask[i] >= 0)); |
11257 | if (Mask[i] == SM_SentinelUndef) |
11258 | continue; |
11259 | if (Mask[i] == SM_SentinelZero) { |
11260 | if (!isUndefOrZero(RepeatedMask[i % LaneSize])) |
11261 | return false; |
11262 | RepeatedMask[i % LaneSize] = SM_SentinelZero; |
11263 | continue; |
11264 | } |
11265 | if ((Mask[i] % Size) / LaneSize != i / LaneSize) |
11266 | |
11267 | return false; |
11268 | |
11269 | |
11270 | |
11271 | int LaneM = Mask[i] / Size; |
11272 | int LocalM = (Mask[i] % LaneSize) + (LaneM * LaneSize); |
11273 | if (RepeatedMask[i % LaneSize] == SM_SentinelUndef) |
11274 | |
11275 | RepeatedMask[i % LaneSize] = LocalM; |
11276 | else if (RepeatedMask[i % LaneSize] != LocalM) |
11277 | |
11278 | return false; |
11279 | } |
11280 | return true; |
11281 | } |
11282 | |
11283 | |
11284 | |
11285 | static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT, |
11286 | ArrayRef<int> Mask, |
11287 | SmallVectorImpl<int> &RepeatedMask) { |
11288 | return isRepeatedTargetShuffleMask(LaneSizeInBits, VT.getScalarSizeInBits(), |
11289 | Mask, RepeatedMask); |
11290 | } |
11291 | |
11292 | |
11293 | |
11294 | static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp, |
11295 | int Idx, int ExpectedIdx) { |
11296 | assert(0 <= Idx && Idx < MaskSize && 0 <= ExpectedIdx && |
11297 | ExpectedIdx < MaskSize && "Out of range element index"); |
11298 | if (!Op || !ExpectedOp || Op.getOpcode() != ExpectedOp.getOpcode()) |
11299 | return false; |
11300 | |
11301 | switch (Op.getOpcode()) { |
11302 | case ISD::BUILD_VECTOR: |
11303 | |
11304 | |
11305 | |
11306 | if (MaskSize == (int)Op.getNumOperands() && |
11307 | MaskSize == (int)ExpectedOp.getNumOperands()) |
11308 | return Op.getOperand(Idx) == ExpectedOp.getOperand(ExpectedIdx); |
11309 | break; |
11310 | case X86ISD::VBROADCAST: |
11311 | case X86ISD::VBROADCAST_LOAD: |
11312 | |
11313 | return (Op == ExpectedOp && |
11314 | (int)Op.getValueType().getVectorNumElements() == MaskSize); |
11315 | case X86ISD::HADD: |
11316 | case X86ISD::HSUB: |
11317 | case X86ISD::FHADD: |
11318 | case X86ISD::FHSUB: |
11319 | case X86ISD::PACKSS: |
11320 | case X86ISD::PACKUS: |
11321 | |
11322 | |
11323 | |
11324 | if (Op == ExpectedOp && Op.getOperand(0) == Op.getOperand(1)) { |
11325 | MVT VT = Op.getSimpleValueType(); |
11326 | int NumElts = VT.getVectorNumElements(); |
11327 | if (MaskSize == NumElts) { |
11328 | int NumLanes = VT.getSizeInBits() / 128; |
11329 | int NumEltsPerLane = NumElts / NumLanes; |
11330 | int NumHalfEltsPerLane = NumEltsPerLane / 2; |
11331 | bool SameLane = |
11332 | (Idx / NumEltsPerLane) == (ExpectedIdx / NumEltsPerLane); |
11333 | bool SameElt = |
11334 | (Idx % NumHalfEltsPerLane) == (ExpectedIdx % NumHalfEltsPerLane); |
11335 | return SameLane && SameElt; |
11336 | } |
11337 | } |
11338 | break; |
11339 | } |
11340 | |
11341 | return false; |
11342 | } |
11343 | |
11344 | |
11345 | |
11346 | |
11347 | |
11348 | |
11349 | |
11350 | |
11351 | |
11352 | |
11353 | |
11354 | static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask, |
11355 | SDValue V1 = SDValue(), |
11356 | SDValue V2 = SDValue()) { |
11357 | int Size = Mask.size(); |
11358 | if (Size != (int)ExpectedMask.size()) |
11359 | return false; |
11360 | |
11361 | for (int i = 0; i < Size; ++i) { |
11362 | assert(Mask[i] >= -1 && "Out of bound mask element!"); |
11363 | int MaskIdx = Mask[i]; |
11364 | int ExpectedIdx = ExpectedMask[i]; |
11365 | if (0 <= MaskIdx && MaskIdx != ExpectedIdx) { |
11366 | SDValue MaskV = MaskIdx < Size ? V1 : V2; |
11367 | SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; |
11368 | MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); |
11369 | ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); |
11370 | if (!IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx)) |
11371 | return false; |
11372 | } |
11373 | } |
11374 | return true; |
11375 | } |
11376 | |
11377 | |
11378 | |
11379 | |
11380 | |
11381 | |
11382 | |
11383 | |
11384 | |
11385 | |
11386 | static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, |
11387 | ArrayRef<int> ExpectedMask, |
11388 | SDValue V1 = SDValue(), |
11389 | SDValue V2 = SDValue()) { |
11390 | int Size = Mask.size(); |
11391 | if (Size != (int)ExpectedMask.size()) |
11392 | return false; |
11393 | assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) && |
11394 | "Illegal target shuffle mask"); |
11395 | |
11396 | |
11397 | if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size)) |
11398 | return false; |
11399 | |
11400 | |
11401 | if (V1 && V1.getValueSizeInBits() != VT.getSizeInBits()) |
11402 | V1 = SDValue(); |
11403 | if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits()) |
11404 | V2 = SDValue(); |
11405 | |
11406 | for (int i = 0; i < Size; ++i) { |
11407 | int MaskIdx = Mask[i]; |
11408 | int ExpectedIdx = ExpectedMask[i]; |
11409 | if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx) |
11410 | continue; |
11411 | if (0 <= MaskIdx && 0 <= ExpectedIdx) { |
11412 | SDValue MaskV = MaskIdx < Size ? V1 : V2; |
11413 | SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; |
11414 | MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); |
11415 | ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); |
11416 | if (IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx)) |
11417 | continue; |
11418 | } |
11419 | |
11420 | return false; |
11421 | } |
11422 | return true; |
11423 | } |
11424 | |
11425 | |
11426 | static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask, |
11427 | SDValue Cond) { |
11428 | EVT CondVT = Cond.getValueType(); |
11429 | unsigned EltSizeInBits = CondVT.getScalarSizeInBits(); |
11430 | unsigned NumElts = CondVT.getVectorNumElements(); |
11431 | |
11432 | APInt UndefElts; |
11433 | SmallVector<APInt, 32> EltBits; |
11434 | if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits, |
11435 | true, false)) |
11436 | return false; |
11437 | |
11438 | Mask.resize(NumElts, SM_SentinelUndef); |
11439 | |
11440 | for (int i = 0; i != (int)NumElts; ++i) { |
11441 | Mask[i] = i; |
11442 | |
11443 | |
11444 | |
11445 | if (UndefElts[i] || EltBits[i].isNullValue()) |
11446 | Mask[i] += NumElts; |
11447 | } |
11448 | |
11449 | return true; |
11450 | } |
11451 | |
11452 | |
11453 | |
11454 | static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) { |
11455 | if (VT != MVT::v8i32 && VT != MVT::v8f32) |
11456 | return false; |
11457 | |
11458 | SmallVector<int, 8> Unpcklwd; |
11459 | createUnpackShuffleMask(MVT::v8i16, Unpcklwd, true, |
11460 | false); |
11461 | SmallVector<int, 8> Unpckhwd; |
11462 | createUnpackShuffleMask(MVT::v8i16, Unpckhwd, false, |
11463 | false); |
11464 | bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) || |
11465 | isTargetShuffleEquivalent(VT, Mask, Unpckhwd)); |
11466 | return IsUnpackwdMask; |
11467 | } |
11468 | |
11469 | static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) { |
11470 | |
11471 | MVT EltVT = MVT::getIntegerVT(128 / Mask.size()); |
11472 | MVT VT = MVT::getVectorVT(EltVT, Mask.size()); |
11473 | |
11474 | |
11475 | SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end()); |
11476 | ShuffleVectorSDNode::commuteMask(CommutedMask); |
11477 | |
11478 | |
11479 | for (unsigned i = 0; i != 4; ++i) { |
11480 | SmallVector<int, 16> UnpackMask; |
11481 | createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2); |
11482 | if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) || |
11483 | isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask)) |
11484 | return true; |
11485 | } |
11486 | return false; |
11487 | } |
11488 | |
11489 | |
11490 | |
11491 | |
11492 | |
11493 | static bool hasIdenticalHalvesShuffleMask(ArrayRef<int> Mask) { |
11494 | assert(Mask.size() % 2 == 0 && "Expecting even number of elements in mask"); |
11495 | unsigned HalfSize = Mask.size() / 2; |
11496 | for (unsigned i = 0; i != HalfSize; ++i) { |
11497 | if (Mask[i] != Mask[i + HalfSize]) |
11498 | return false; |
11499 | } |
11500 | return true; |
11501 | } |
11502 | |
11503 | |
11504 | |
11505 | |
11506 | |
11507 | |
11508 | |
11509 | |
11510 | |
11511 | static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) { |
11512 | assert(Mask.size() == 4 && "Only 4-lane shuffle masks"); |
11513 | assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!"); |
11514 | assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!"); |
11515 | assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!"); |
11516 | assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!"); |
11517 | |
11518 | |
11519 | |
11520 | int FirstIndex = find_if(Mask, [](int M) { return M >= 0; }) - Mask.begin(); |
11521 | assert(0 <= FirstIndex && FirstIndex < 4 && "All undef shuffle mask"); |
11522 | |
11523 | int FirstElt = Mask[FirstIndex]; |
11524 | if (all_of(Mask, [FirstElt](int M) { return M < 0 || M == FirstElt; })) |
11525 | return (FirstElt << 6) | (FirstElt << 4) | (FirstElt << 2) | FirstElt; |
11526 | |
11527 | unsigned Imm = 0; |
11528 | Imm |= (Mask[0] < 0 ? 0 : Mask[0]) << 0; |
11529 | Imm |= (Mask[1] < 0 ? 1 : Mask[1]) << 2; |
11530 | Imm |= (Mask[2] < 0 ? 2 : Mask[2]) << 4; |
11531 | Imm |= (Mask[3] < 0 ? 3 : Mask[3]) << 6; |
11532 | return Imm; |
11533 | } |
11534 | |
11535 | static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL, |
11536 | SelectionDAG &DAG) { |
11537 | return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); |
11538 | } |
11539 | |
11540 | |
11541 | |
11542 | |
11543 | |
11544 | |
11545 | |
11546 | |
11547 | static bool isNonZeroElementsInOrder(const APInt &Zeroable, |
11548 | ArrayRef<int> Mask, const EVT &VectorType, |
11549 | bool &IsZeroSideLeft) { |
11550 | int NextElement = -1; |
11551 | |
11552 | for (int i = 0, e = Mask.size(); i < e; i++) { |
11553 | |
11554 | assert(Mask[i] >= -1 && "Out of bound mask element!"); |
11555 | if (Mask[i] < 0) |
11556 | return false; |
11557 | if (Zeroable[i]) |
11558 | continue; |
11559 | |
11560 | if (NextElement < 0) { |
11561 | NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0; |
11562 | IsZeroSideLeft = NextElement != 0; |
11563 | } |
11564 | |
11565 | if (NextElement != Mask[i]) |
11566 | return false; |
11567 | NextElement++; |
11568 | } |
11569 | return true; |
11570 | } |
11571 | |
11572 | |
11573 | static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT, |
11574 | ArrayRef<int> Mask, SDValue V1, |
11575 | SDValue V2, const APInt &Zeroable, |
11576 | const X86Subtarget &Subtarget, |
11577 | SelectionDAG &DAG) { |
11578 | int Size = Mask.size(); |
11579 | int LaneSize = 128 / VT.getScalarSizeInBits(); |
11580 | const int NumBytes = VT.getSizeInBits() / 8; |
11581 | const int NumEltBytes = VT.getScalarSizeInBits() / 8; |
11582 | |
11583 | assert((Subtarget.hasSSSE3() && VT.is128BitVector()) || |
11584 | (Subtarget.hasAVX2() && VT.is256BitVector()) || |
11585 | (Subtarget.hasBWI() && VT.is512BitVector())); |
11586 | |
11587 | SmallVector<SDValue, 64> PSHUFBMask(NumBytes); |
11588 | |
11589 | SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8); |
11590 | |
11591 | SDValue V; |
11592 | for (int i = 0; i < NumBytes; ++i) { |
11593 | int M = Mask[i / NumEltBytes]; |
11594 | if (M < 0) { |
11595 | PSHUFBMask[i] = DAG.getUNDEF(MVT::i8); |
11596 | continue; |
11597 | } |
11598 | if (Zeroable[i / NumEltBytes]) { |
11599 | PSHUFBMask[i] = ZeroMask; |
11600 | continue; |
11601 | } |
11602 | |
11603 | |
11604 | SDValue SrcV = (M >= Size ? V2 : V1); |
11605 | if (V && V != SrcV) |
11606 | return SDValue(); |
11607 | V = SrcV; |
11608 | M %= Size; |
11609 | |
11610 | |
11611 | if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize)) |
11612 | return SDValue(); |
11613 | |
11614 | M = M % LaneSize; |
11615 | M = M * NumEltBytes + (i % NumEltBytes); |
11616 | PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8); |
11617 | } |
11618 | assert(V && "Failed to find a source input"); |
11619 | |
11620 | MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes); |
11621 | return DAG.getBitcast( |
11622 | VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V), |
11623 | DAG.getBuildVector(I8VT, DL, PSHUFBMask))); |
11624 | } |
11625 | |
11626 | static SDValue getMaskNode(SDValue Mask, MVT MaskVT, |
11627 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
11628 | const SDLoc &dl); |
11629 | |
11630 | |
11631 | static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT, |
11632 | const APInt &Zeroable, |
11633 | ArrayRef<int> Mask, SDValue &V1, |
11634 | SDValue &V2, SelectionDAG &DAG, |
11635 | const X86Subtarget &Subtarget) { |
11636 | bool IsLeftZeroSide = true; |
11637 | if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(), |
11638 | IsLeftZeroSide)) |
11639 | return SDValue(); |
11640 | unsigned VEXPANDMask = (~Zeroable).getZExtValue(); |
11641 | MVT IntegerType = |
11642 | MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); |
11643 | SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType); |
11644 | unsigned NumElts = VT.getVectorNumElements(); |
11645 | assert((NumElts == 4 || NumElts == 8 || NumElts == 16) && |
11646 | "Unexpected number of vector elements"); |
11647 | SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts), |
11648 | Subtarget, DAG, DL); |
11649 | SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); |
11650 | SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; |
11651 | return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask); |
11652 | } |
11653 | |
11654 | static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, |
11655 | unsigned &UnpackOpcode, bool IsUnary, |
11656 | ArrayRef<int> TargetMask, const SDLoc &DL, |
11657 | SelectionDAG &DAG, |
11658 | const X86Subtarget &Subtarget) { |
11659 | int NumElts = VT.getVectorNumElements(); |
11660 | |
11661 | bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true; |
11662 | for (int i = 0; i != NumElts; i += 2) { |
11663 | int M1 = TargetMask[i + 0]; |
11664 | int M2 = TargetMask[i + 1]; |
11665 | Undef1 &= (SM_SentinelUndef == M1); |
11666 | Undef2 &= (SM_SentinelUndef == M2); |
11667 | Zero1 &= isUndefOrZero(M1); |
11668 | Zero2 &= isUndefOrZero(M2); |
11669 | } |
11670 | assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) && |
11671 | "Zeroable shuffle detected"); |
11672 | |
11673 | |
11674 | SmallVector<int, 64> Unpckl, Unpckh; |
11675 | createUnpackShuffleMask(VT, Unpckl, true, IsUnary); |
11676 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1, |
11677 | (IsUnary ? V1 : V2))) { |
11678 | UnpackOpcode = X86ISD::UNPCKL; |
11679 | V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); |
11680 | V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); |
11681 | return true; |
11682 | } |
11683 | |
11684 | createUnpackShuffleMask(VT, Unpckh, false, IsUnary); |
11685 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1, |
11686 | (IsUnary ? V1 : V2))) { |
11687 | UnpackOpcode = X86ISD::UNPCKH; |
11688 | V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); |
11689 | V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); |
11690 | return true; |
11691 | } |
11692 | |
11693 | |
11694 | if (IsUnary && (Zero1 || Zero2)) { |
11695 | |
11696 | if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) && |
11697 | isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0)) |
11698 | return false; |
11699 | |
11700 | bool MatchLo = true, MatchHi = true; |
11701 | for (int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) { |
11702 | int M = TargetMask[i]; |
11703 | |
11704 | |
11705 | if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) || |
11706 | (M == SM_SentinelUndef)) |
11707 | continue; |
11708 | |
11709 | MatchLo &= (M == Unpckl[i]); |
11710 | MatchHi &= (M == Unpckh[i]); |
11711 | } |
11712 | |
11713 | if (MatchLo || MatchHi) { |
11714 | UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH; |
11715 | V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; |
11716 | V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; |
11717 | return true; |
11718 | } |
11719 | } |
11720 | |
11721 | |
11722 | if (!IsUnary) { |
11723 | ShuffleVectorSDNode::commuteMask(Unpckl); |
11724 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) { |
11725 | UnpackOpcode = X86ISD::UNPCKL; |
11726 | std::swap(V1, V2); |
11727 | return true; |
11728 | } |
11729 | |
11730 | ShuffleVectorSDNode::commuteMask(Unpckh); |
11731 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) { |
11732 | UnpackOpcode = X86ISD::UNPCKH; |
11733 | std::swap(V1, V2); |
11734 | return true; |
11735 | } |
11736 | } |
11737 | |
11738 | return false; |
11739 | } |
11740 | |
11741 | |
11742 | |
11743 | static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT, |
11744 | ArrayRef<int> Mask, SDValue V1, SDValue V2, |
11745 | SelectionDAG &DAG) { |
11746 | SmallVector<int, 8> Unpckl; |
11747 | createUnpackShuffleMask(VT, Unpckl, true, false); |
11748 | if (isShuffleEquivalent(Mask, Unpckl, V1, V2)) |
11749 | return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); |
11750 | |
11751 | SmallVector<int, 8> Unpckh; |
11752 | createUnpackShuffleMask(VT, Unpckh, false, false); |
11753 | if (isShuffleEquivalent(Mask, Unpckh, V1, V2)) |
11754 | return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); |
11755 | |
11756 | |
11757 | ShuffleVectorSDNode::commuteMask(Unpckl); |
11758 | if (isShuffleEquivalent(Mask, Unpckl, V1, V2)) |
11759 | return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1); |
11760 | |
11761 | ShuffleVectorSDNode::commuteMask(Unpckh); |
11762 | if (isShuffleEquivalent(Mask, Unpckh, V1, V2)) |
11763 | return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1); |
11764 | |
11765 | return SDValue(); |
11766 | } |
11767 | |
11768 | |
11769 | |
11770 | static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT, |
11771 | ArrayRef<int> Mask, SDValue V1, |
11772 | SDValue V2, SelectionDAG &DAG) { |
11773 | SmallVector<int, 32> Unpckl, Unpckh; |
11774 | createSplat2ShuffleMask(VT, Unpckl, true); |
11775 | createSplat2ShuffleMask(VT, Unpckh, false); |
11776 | |
11777 | unsigned UnpackOpcode; |
11778 | if (isShuffleEquivalent(Mask, Unpckl, V1, V2)) |
11779 | UnpackOpcode = X86ISD::UNPCKL; |
11780 | else if (isShuffleEquivalent(Mask, Unpckh, V1, V2)) |
11781 | UnpackOpcode = X86ISD::UNPCKH; |
11782 | else |
11783 | return SDValue(); |
11784 | |
11785 | |
11786 | |
11787 | |
11788 | V1 = DAG.getVectorShuffle(MVT::v4f64, DL, DAG.getBitcast(MVT::v4f64, V1), |
11789 | DAG.getUNDEF(MVT::v4f64), {0, 2, 1, 3}); |
11790 | V1 = DAG.getBitcast(VT, V1); |
11791 | return DAG.getNode(UnpackOpcode, DL, VT, V1, V1); |
11792 | } |
11793 | |
11794 | |
11795 | |
11796 | static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT, |
11797 | ArrayRef<int> Mask, const APInt &Zeroable, |
11798 | const X86Subtarget &Subtarget) { |
11799 | if (!VT.is512BitVector() && !Subtarget.hasVLX()) |
11800 | return false; |
11801 | |
11802 | unsigned NumElts = Mask.size(); |
11803 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
11804 | unsigned MaxScale = 64 / EltSizeInBits; |
11805 | |
11806 | for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { |
11807 | unsigned SrcEltBits = EltSizeInBits * Scale; |
11808 | if (SrcEltBits < 32 && !Subtarget.hasBWI()) |
11809 | continue; |
11810 | unsigned NumSrcElts = NumElts / Scale; |
11811 | if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale)) |
11812 | continue; |
11813 | unsigned UpperElts = NumElts - NumSrcElts; |
11814 | if (!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) |
11815 | continue; |
11816 | SrcVT = MVT::getIntegerVT(EltSizeInBits * Scale); |
11817 | SrcVT = MVT::getVectorVT(SrcVT, NumSrcElts); |
11818 | DstVT = MVT::getIntegerVT(EltSizeInBits); |
11819 | if ((NumSrcElts * EltSizeInBits) >= 128) { |
11820 | |
11821 | DstVT = MVT::getVectorVT(DstVT, NumSrcElts); |
11822 | } else { |
11823 | |
11824 | DstVT = MVT::getVectorVT(DstVT, 128 / EltSizeInBits); |
11825 | } |
11826 | return true; |
11827 | } |
11828 | |
11829 | return false; |
11830 | } |
11831 | |
11832 | |
11833 | |
11834 | static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src, |
11835 | const X86Subtarget &Subtarget, |
11836 | SelectionDAG &DAG, bool ZeroUppers) { |
11837 | MVT SrcVT = Src.getSimpleValueType(); |
11838 | MVT DstSVT = DstVT.getScalarType(); |
11839 | unsigned NumDstElts = DstVT.getVectorNumElements(); |
11840 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
11841 | unsigned DstEltSizeInBits = DstVT.getScalarSizeInBits(); |
11842 | |
11843 | if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) |
11844 | return SDValue(); |
11845 | |
11846 | |
11847 | if (NumSrcElts == NumDstElts) |
11848 | return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src); |
11849 | |
11850 | if (NumSrcElts > NumDstElts) { |
11851 | MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts); |
11852 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src); |
11853 | return extractSubVector(Trunc, 0, DAG, DL, DstVT.getSizeInBits()); |
11854 | } |
11855 | |
11856 | if ((NumSrcElts * DstEltSizeInBits) >= 128) { |
11857 | MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts); |
11858 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src); |
11859 | return widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL, |
11860 | DstVT.getSizeInBits()); |
11861 | } |
11862 | |
11863 | |
11864 | |
11865 | if (!Subtarget.hasVLX() && !SrcVT.is512BitVector()) { |
11866 | SDValue NewSrc = widenSubVector(Src, ZeroUppers, Subtarget, DAG, DL, 512); |
11867 | return getAVX512TruncNode(DL, DstVT, NewSrc, Subtarget, DAG, ZeroUppers); |
11868 | } |
11869 | |
11870 | |
11871 | MVT TruncVT = MVT::getVectorVT(DstSVT, 128 / DstEltSizeInBits); |
11872 | SDValue Trunc = DAG.getNode(X86ISD::VTRUNC, DL, TruncVT, Src); |
11873 | if (DstVT != TruncVT) |
11874 | Trunc = widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL, |
11875 | DstVT.getSizeInBits()); |
11876 | return Trunc; |
11877 | } |
11878 | |
11879 | |
11880 | |
11881 | |
11882 | |
11883 | |
11884 | |
11885 | |
11886 | |
11887 | |
11888 | |
11889 | |
11890 | |
11891 | |
11892 | |
11893 | |
11894 | |
11895 | static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1, |
11896 | SDValue V2, ArrayRef<int> Mask, |
11897 | const APInt &Zeroable, |
11898 | const X86Subtarget &Subtarget, |
11899 | SelectionDAG &DAG) { |
11900 | assert((VT == MVT::v16i8 || VT == MVT::v8i16) && "Unexpected VTRUNC type"); |
11901 | if (!Subtarget.hasAVX512()) |
11902 | return SDValue(); |
11903 | |
11904 | unsigned NumElts = VT.getVectorNumElements(); |
11905 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
11906 | unsigned MaxScale = 64 / EltSizeInBits; |
11907 | for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { |
11908 | unsigned NumSrcElts = NumElts / Scale; |
11909 | unsigned UpperElts = NumElts - NumSrcElts; |
11910 | if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) || |
11911 | !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) |
11912 | continue; |
11913 | |
11914 | SDValue Src = V1; |
11915 | if (!Src.hasOneUse()) |
11916 | return SDValue(); |
11917 | |
11918 | Src = peekThroughOneUseBitcasts(Src); |
11919 | if (Src.getOpcode() != ISD::TRUNCATE || |
11920 | Src.getScalarValueSizeInBits() != (EltSizeInBits * Scale)) |
11921 | return SDValue(); |
11922 | Src = Src.getOperand(0); |
11923 | |
11924 | |
11925 | MVT SrcVT = Src.getSimpleValueType(); |
11926 | if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 && |
11927 | !Subtarget.hasBWI()) |
11928 | return SDValue(); |
11929 | |
11930 | bool UndefUppers = isUndefInRange(Mask, NumSrcElts, UpperElts); |
11931 | return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers); |
11932 | } |
11933 | |
11934 | return SDValue(); |
11935 | } |
11936 | |
11937 | |
11938 | static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1, |
11939 | SDValue V2, ArrayRef<int> Mask, |
11940 | const APInt &Zeroable, |
11941 | const X86Subtarget &Subtarget, |
11942 | SelectionDAG &DAG) { |
11943 | assert((VT.is128BitVector() || VT.is256BitVector()) && |
11944 | "Unexpected VTRUNC type"); |
11945 | if (!Subtarget.hasAVX512()) |
11946 | return SDValue(); |
11947 | |
11948 | unsigned NumElts = VT.getVectorNumElements(); |
11949 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
11950 | unsigned MaxScale = 64 / EltSizeInBits; |
11951 | for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { |
11952 | |
11953 | unsigned SrcEltBits = EltSizeInBits * Scale; |
11954 | if (SrcEltBits < 32 && !Subtarget.hasBWI()) |
11955 | continue; |
11956 | |
11957 | |
11958 | |
11959 | unsigned NumHalfSrcElts = NumElts / Scale; |
11960 | unsigned NumSrcElts = 2 * NumHalfSrcElts; |
11961 | if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) || |
11962 | isUndefInRange(Mask, NumHalfSrcElts, NumHalfSrcElts)) |
11963 | continue; |
11964 | |
11965 | |
11966 | unsigned UpperElts = NumElts - NumSrcElts; |
11967 | if (UpperElts > 0 && |
11968 | !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) |
11969 | continue; |
11970 | bool UndefUppers = |
11971 | UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts); |
11972 | |
11973 | |
11974 | |
11975 | MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2); |
11976 | SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2); |
11977 | |
11978 | MVT SrcSVT = MVT::getIntegerVT(SrcEltBits); |
11979 | MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts); |
11980 | Src = DAG.getBitcast(SrcVT, Src); |
11981 | return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers); |
11982 | } |
11983 | |
11984 | return SDValue(); |
11985 | } |
11986 | |
11987 | |
11988 | |
11989 | |
11990 | |
11991 | |
11992 | |
11993 | |
11994 | |
11995 | |
11996 | |
11997 | |
11998 | |
11999 | |
12000 | |
12001 | |
12002 | |
12003 | |
12004 | |
12005 | |
12006 | |
12007 | |
12008 | static int canLowerByDroppingEvenElements(ArrayRef<int> Mask, |
12009 | bool IsSingleInput) { |
12010 | |
12011 | |
12012 | int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2); |
12013 | assert(isPowerOf2_32((uint32_t)ShuffleModulus) && |
12014 | "We should only be called with masks with a power-of-2 size!"); |
12015 | |
12016 | uint64_t ModMask = (uint64_t)ShuffleModulus - 1; |
12017 | |
12018 | |
12019 | |
12020 | |
12021 | bool ViableForN[3] = {true, true, true}; |
12022 | |
12023 | for (int i = 0, e = Mask.size(); i < e; ++i) { |
12024 | |
12025 | |
12026 | if (Mask[i] < 0) |
12027 | continue; |
12028 | |
12029 | bool IsAnyViable = false; |
12030 | for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) |
12031 | if (ViableForN[j]) { |
12032 | uint64_t N = j + 1; |
12033 | |
12034 | |
12035 | if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask)) |
12036 | IsAnyViable = true; |
12037 | else |
12038 | ViableForN[j] = false; |
12039 | } |
12040 | |
12041 | if (!IsAnyViable) |
12042 | break; |
12043 | } |
12044 | |
12045 | for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) |
12046 | if (ViableForN[j]) |
12047 | return j + 1; |
12048 | |
12049 | |
12050 | return 0; |
12051 | } |
12052 | |
12053 | |
12054 | |
12055 | |
12056 | |
12057 | static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, |
12058 | unsigned &PackOpcode, ArrayRef<int> TargetMask, |
12059 | const SelectionDAG &DAG, |
12060 | const X86Subtarget &Subtarget, |
12061 | unsigned MaxStages = 1) { |
12062 | unsigned NumElts = VT.getVectorNumElements(); |
12063 | unsigned BitSize = VT.getScalarSizeInBits(); |
12064 | assert(0 < MaxStages && MaxStages <= 3 && (BitSize << MaxStages) <= 64 && |
12065 | "Illegal maximum compaction"); |
12066 | |
12067 | auto MatchPACK = [&](SDValue N1, SDValue N2, MVT PackVT) { |
12068 | unsigned NumSrcBits = PackVT.getScalarSizeInBits(); |
12069 | unsigned NumPackedBits = NumSrcBits - BitSize; |
12070 | N1 = peekThroughBitcasts(N1); |
12071 | N2 = peekThroughBitcasts(N2); |
12072 | unsigned NumBits1 = N1.getScalarValueSizeInBits(); |
12073 | unsigned NumBits2 = N2.getScalarValueSizeInBits(); |
12074 | bool IsZero1 = llvm::isNullOrNullSplat(N1, false); |
12075 | bool IsZero2 = llvm::isNullOrNullSplat(N2, false); |
12076 | if ((!N1.isUndef() && !IsZero1 && NumBits1 != NumSrcBits) || |
12077 | (!N2.isUndef() && !IsZero2 && NumBits2 != NumSrcBits)) |
12078 | return false; |
12079 | if (Subtarget.hasSSE41() || BitSize == 8) { |
12080 | APInt ZeroMask = APInt::getHighBitsSet(NumSrcBits, NumPackedBits); |
12081 | if ((N1.isUndef() || IsZero1 || DAG.MaskedValueIsZero(N1, ZeroMask)) && |
12082 | (N2.isUndef() || IsZero2 || DAG.MaskedValueIsZero(N2, ZeroMask))) { |
12083 | V1 = N1; |
12084 | V2 = N2; |
12085 | SrcVT = PackVT; |
12086 | PackOpcode = X86ISD::PACKUS; |
12087 | return true; |
12088 | } |
12089 | } |
12090 | bool IsAllOnes1 = llvm::isAllOnesOrAllOnesSplat(N1, false); |
12091 | bool IsAllOnes2 = llvm::isAllOnesOrAllOnesSplat(N2, false); |
12092 | if ((N1.isUndef() || IsZero1 || IsAllOnes1 || |
12093 | DAG.ComputeNumSignBits(N1) > NumPackedBits) && |
12094 | (N2.isUndef() || IsZero2 || IsAllOnes2 || |
12095 | DAG.ComputeNumSignBits(N2) > NumPackedBits)) { |
12096 | V1 = N1; |
12097 | V2 = N2; |
12098 | SrcVT = PackVT; |
12099 | PackOpcode = X86ISD::PACKSS; |
12100 | return true; |
12101 | } |
12102 | return false; |
12103 | }; |
12104 | |
12105 | |
12106 | for (unsigned NumStages = 1; NumStages <= MaxStages; ++NumStages) { |
12107 | MVT PackSVT = MVT::getIntegerVT(BitSize << NumStages); |
12108 | MVT PackVT = MVT::getVectorVT(PackSVT, NumElts >> NumStages); |
12109 | |
12110 | |
12111 | SmallVector<int, 32> BinaryMask; |
12112 | createPackShuffleMask(VT, BinaryMask, false, NumStages); |
12113 | if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2)) |
12114 | if (MatchPACK(V1, V2, PackVT)) |
12115 | return true; |
12116 | |
12117 | |
12118 | SmallVector<int, 32> UnaryMask; |
12119 | createPackShuffleMask(VT, UnaryMask, true, NumStages); |
12120 | if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1)) |
12121 | if (MatchPACK(V1, V1, PackVT)) |
12122 | return true; |
12123 | } |
12124 | |
12125 | return false; |
12126 | } |
12127 | |
12128 | static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, |
12129 | SDValue V1, SDValue V2, SelectionDAG &DAG, |
12130 | const X86Subtarget &Subtarget) { |
12131 | MVT PackVT; |
12132 | unsigned PackOpcode; |
12133 | unsigned SizeBits = VT.getSizeInBits(); |
12134 | unsigned EltBits = VT.getScalarSizeInBits(); |
12135 | unsigned MaxStages = Log2_32(64 / EltBits); |
12136 | if (!matchShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG, |
12137 | Subtarget, MaxStages)) |
12138 | return SDValue(); |
12139 | |
12140 | unsigned CurrentEltBits = PackVT.getScalarSizeInBits(); |
12141 | unsigned NumStages = Log2_32(CurrentEltBits / EltBits); |
12142 | |
12143 | |
12144 | if (NumStages != 1 && SizeBits == 128 && Subtarget.hasVLX()) |
12145 | return SDValue(); |
12146 | |
12147 | |
12148 | |
12149 | unsigned MaxPackBits = 16; |
12150 | if (CurrentEltBits > 16 && |
12151 | (PackOpcode == X86ISD::PACKSS || Subtarget.hasSSE41())) |
12152 | MaxPackBits = 32; |
12153 | |
12154 | |
12155 | SDValue Res; |
12156 | for (unsigned i = 0; i != NumStages; ++i) { |
12157 | unsigned SrcEltBits = std::min(MaxPackBits, CurrentEltBits); |
12158 | unsigned NumSrcElts = SizeBits / SrcEltBits; |
12159 | MVT SrcSVT = MVT::getIntegerVT(SrcEltBits); |
12160 | MVT DstSVT = MVT::getIntegerVT(SrcEltBits / 2); |
12161 | MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts); |
12162 | MVT DstVT = MVT::getVectorVT(DstSVT, NumSrcElts * 2); |
12163 | Res = DAG.getNode(PackOpcode, DL, DstVT, DAG.getBitcast(SrcVT, V1), |
12164 | DAG.getBitcast(SrcVT, V2)); |
12165 | V1 = V2 = Res; |
12166 | CurrentEltBits /= 2; |
12167 | } |
12168 | assert(Res && Res.getValueType() == VT && |
12169 | "Failed to lower compaction shuffle"); |
12170 | return Res; |
12171 | } |
12172 | |
12173 | |
12174 | |
12175 | |
12176 | |
12177 | static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, |
12178 | SDValue V2, ArrayRef<int> Mask, |
12179 | const APInt &Zeroable, |
12180 | const X86Subtarget &Subtarget, |
12181 | SelectionDAG &DAG) { |
12182 | MVT MaskVT = VT; |
12183 | MVT EltVT = VT.getVectorElementType(); |
12184 | SDValue Zero, AllOnes; |
12185 | |
12186 | if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { |
12187 | EltVT = MVT::f64; |
12188 | MaskVT = MVT::getVectorVT(EltVT, Mask.size()); |
12189 | } |
12190 | |
12191 | MVT LogicVT = VT; |
12192 | if (EltVT == MVT::f32 || EltVT == MVT::f64) { |
12193 | Zero = DAG.getConstantFP(0.0, DL, EltVT); |
12194 | APFloat AllOnesValue = APFloat::getAllOnesValue( |
12195 | SelectionDAG::EVTToAPFloatSemantics(EltVT), EltVT.getSizeInBits()); |
12196 | AllOnes = DAG.getConstantFP(AllOnesValue, DL, EltVT); |
12197 | LogicVT = |
12198 | MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size()); |
12199 | } else { |
12200 | Zero = DAG.getConstant(0, DL, EltVT); |
12201 | AllOnes = DAG.getAllOnesConstant(DL, EltVT); |
12202 | } |
12203 | |
12204 | SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero); |
12205 | SDValue V; |
12206 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
12207 | if (Zeroable[i]) |
12208 | continue; |
12209 | if (Mask[i] % Size != i) |
12210 | return SDValue(); |
12211 | if (!V) |
12212 | V = Mask[i] < Size ? V1 : V2; |
12213 | else if (V != (Mask[i] < Size ? V1 : V2)) |
12214 | return SDValue(); |
12215 | |
12216 | VMaskOps[i] = AllOnes; |
12217 | } |
12218 | if (!V) |
12219 | return SDValue(); |
12220 | |
12221 | SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps); |
12222 | VMask = DAG.getBitcast(LogicVT, VMask); |
12223 | V = DAG.getBitcast(LogicVT, V); |
12224 | SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask); |
12225 | return DAG.getBitcast(VT, And); |
12226 | } |
12227 | |
12228 | |
12229 | |
12230 | |
12231 | |
12232 | |
12233 | static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, |
12234 | SDValue V2, ArrayRef<int> Mask, |
12235 | SelectionDAG &DAG) { |
12236 | assert(VT.isInteger() && "Only supports integer vector types!"); |
12237 | MVT EltVT = VT.getVectorElementType(); |
12238 | SDValue Zero = DAG.getConstant(0, DL, EltVT); |
12239 | SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT); |
12240 | SmallVector<SDValue, 16> MaskOps; |
12241 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
12242 | if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size) |
12243 | return SDValue(); |
12244 | MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero); |
12245 | } |
12246 | |
12247 | SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps); |
12248 | V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask); |
12249 | V2 = DAG.getNode(X86ISD::ANDNP, DL, VT, V1Mask, V2); |
12250 | return DAG.getNode(ISD::OR, DL, VT, V1, V2); |
12251 | } |
12252 | |
12253 | static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, |
12254 | SDValue PreservedSrc, |
12255 | const X86Subtarget &Subtarget, |
12256 | SelectionDAG &DAG); |
12257 | |
12258 | static bool matchShuffleAsBlend(SDValue V1, SDValue V2, |
12259 | MutableArrayRef<int> Mask, |
12260 | const APInt &Zeroable, bool &ForceV1Zero, |
12261 | bool &ForceV2Zero, uint64_t &BlendMask) { |
12262 | bool V1IsZeroOrUndef = |
12263 | V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode()); |
12264 | bool V2IsZeroOrUndef = |
12265 | V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode()); |
12266 | |
12267 | BlendMask = 0; |
12268 | ForceV1Zero = false, ForceV2Zero = false; |
12269 | assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask"); |
12270 | |
12271 | |
12272 | |
12273 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
12274 | int M = Mask[i]; |
12275 | if (M == SM_SentinelUndef) |
12276 | continue; |
12277 | if (M == i || |
12278 | (0 <= M && M < Size && IsElementEquivalent(Size, V1, V1, M, i))) { |
12279 | Mask[i] = i; |
12280 | continue; |
12281 | } |
12282 | if (M == (i + Size) || |
12283 | (Size <= M && IsElementEquivalent(Size, V2, V2, M - Size, i))) { |
12284 | BlendMask |= 1ull << i; |
12285 | Mask[i] = i + Size; |
12286 | continue; |
12287 | } |
12288 | if (Zeroable[i]) { |
12289 | if (V1IsZeroOrUndef) { |
12290 | ForceV1Zero = true; |
12291 | Mask[i] = i; |
12292 | continue; |
12293 | } |
12294 | if (V2IsZeroOrUndef) { |
12295 | ForceV2Zero = true; |
12296 | BlendMask |= 1ull << i; |
12297 | Mask[i] = i + Size; |
12298 | continue; |
12299 | } |
12300 | } |
12301 | return false; |
12302 | } |
12303 | return true; |
12304 | } |
12305 | |
12306 | static uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, |
12307 | int Scale) { |
12308 | uint64_t ScaledMask = 0; |
12309 | for (int i = 0; i != Size; ++i) |
12310 | if (BlendMask & (1ull << i)) |
12311 | ScaledMask |= ((1ull << Scale) - 1) << (i * Scale); |
12312 | return ScaledMask; |
12313 | } |
12314 | |
12315 | |
12316 | |
12317 | |
12318 | |
12319 | |
12320 | |
12321 | static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, |
12322 | SDValue V2, ArrayRef<int> Original, |
12323 | const APInt &Zeroable, |
12324 | const X86Subtarget &Subtarget, |
12325 | SelectionDAG &DAG) { |
12326 | uint64_t BlendMask = 0; |
12327 | bool ForceV1Zero = false, ForceV2Zero = false; |
12328 | SmallVector<int, 64> Mask(Original.begin(), Original.end()); |
12329 | if (!matchShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero, |
12330 | BlendMask)) |
12331 | return SDValue(); |
12332 | |
12333 | |
12334 | if (ForceV1Zero) |
12335 | V1 = getZeroVector(VT, Subtarget, DAG, DL); |
12336 | if (ForceV2Zero) |
12337 | V2 = getZeroVector(VT, Subtarget, DAG, DL); |
12338 | |
12339 | switch (VT.SimpleTy) { |
12340 | case MVT::v4i64: |
12341 | case MVT::v8i32: |
12342 | assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); |
12343 | LLVM_FALLTHROUGH; |
12344 | case MVT::v4f64: |
12345 | case MVT::v8f32: |
12346 | assert(Subtarget.hasAVX() && "256-bit float blends require AVX!"); |
12347 | LLVM_FALLTHROUGH; |
12348 | case MVT::v2f64: |
12349 | case MVT::v2i64: |
12350 | case MVT::v4f32: |
12351 | case MVT::v4i32: |
12352 | case MVT::v8i16: |
12353 | assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!"); |
12354 | return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, |
12355 | DAG.getTargetConstant(BlendMask, DL, MVT::i8)); |
12356 | case MVT::v16i16: { |
12357 | assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!"); |
12358 | SmallVector<int, 8> RepeatedMask; |
12359 | if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { |
12360 | |
12361 | assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!"); |
12362 | BlendMask = 0; |
12363 | for (int i = 0; i < 8; ++i) |
12364 | if (RepeatedMask[i] >= 8) |
12365 | BlendMask |= 1ull << i; |
12366 | return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, |
12367 | DAG.getTargetConstant(BlendMask, DL, MVT::i8)); |
12368 | } |
12369 | |
12370 | |
12371 | |
12372 | uint64_t LoMask = BlendMask & 0xFF; |
12373 | uint64_t HiMask = (BlendMask >> 8) & 0xFF; |
12374 | if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { |
12375 | SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, |
12376 | DAG.getTargetConstant(LoMask, DL, MVT::i8)); |
12377 | SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, |
12378 | DAG.getTargetConstant(HiMask, DL, MVT::i8)); |
12379 | return DAG.getVectorShuffle( |
12380 | MVT::v16i16, DL, Lo, Hi, |
12381 | {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); |
12382 | } |
12383 | LLVM_FALLTHROUGH; |
12384 | } |
12385 | case MVT::v32i8: |
12386 | assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!"); |
12387 | LLVM_FALLTHROUGH; |
12388 | case MVT::v16i8: { |
12389 | assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!"); |
12390 | |
12391 | |
12392 | if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
12393 | Subtarget, DAG)) |
12394 | return Masked; |
12395 | |
12396 | if (Subtarget.hasBWI() && Subtarget.hasVLX()) { |
12397 | MVT IntegerType = |
12398 | MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); |
12399 | SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); |
12400 | return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); |
12401 | } |
12402 | |
12403 | |
12404 | if (Subtarget.hasVLX()) |
12405 | if (SDValue BitBlend = |
12406 | lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) |
12407 | return BitBlend; |
12408 | |
12409 | |
12410 | int Scale = VT.getScalarSizeInBits() / 8; |
12411 | |
12412 | |
12413 | |
12414 | MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
12415 | |
12416 | |
12417 | |
12418 | |
12419 | |
12420 | if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) { |
12421 | ShuffleVectorSDNode::commuteMask(Mask); |
12422 | std::swap(V1, V2); |
12423 | } |
12424 | |
12425 | |
12426 | |
12427 | |
12428 | |
12429 | |
12430 | |
12431 | |
12432 | |
12433 | |
12434 | |
12435 | |
12436 | SmallVector<SDValue, 32> VSELECTMask; |
12437 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
12438 | for (int j = 0; j < Scale; ++j) |
12439 | VSELECTMask.push_back( |
12440 | Mask[i] < 0 ? DAG.getUNDEF(MVT::i8) |
12441 | : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, |
12442 | MVT::i8)); |
12443 | |
12444 | V1 = DAG.getBitcast(BlendVT, V1); |
12445 | V2 = DAG.getBitcast(BlendVT, V2); |
12446 | return DAG.getBitcast( |
12447 | VT, |
12448 | DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask), |
12449 | V1, V2)); |
12450 | } |
12451 | case MVT::v16f32: |
12452 | case MVT::v8f64: |
12453 | case MVT::v8i64: |
12454 | case MVT::v16i32: |
12455 | case MVT::v32i16: |
12456 | case MVT::v64i8: { |
12457 | |
12458 | bool OptForSize = DAG.shouldOptForSize(); |
12459 | if (!OptForSize) { |
12460 | if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
12461 | Subtarget, DAG)) |
12462 | return Masked; |
12463 | } |
12464 | |
12465 | |
12466 | |
12467 | MVT IntegerType = |
12468 | MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); |
12469 | SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); |
12470 | return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); |
12471 | } |
12472 | default: |
12473 | llvm_unreachable("Not a supported integer vector type!"); |
12474 | } |
12475 | } |
12476 | |
12477 | |
12478 | |
12479 | |
12480 | |
12481 | |
12482 | static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, |
12483 | SDValue V1, SDValue V2, |
12484 | ArrayRef<int> Mask, |
12485 | SelectionDAG &DAG, |
12486 | bool ImmBlends = false) { |
12487 | |
12488 | |
12489 | SmallVector<int, 32> BlendMask(Mask.size(), -1); |
12490 | SmallVector<int, 32> PermuteMask(Mask.size(), -1); |
12491 | |
12492 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
12493 | if (Mask[i] < 0) |
12494 | continue; |
12495 | |
12496 | assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds."); |
12497 | |
12498 | if (BlendMask[Mask[i] % Size] < 0) |
12499 | BlendMask[Mask[i] % Size] = Mask[i]; |
12500 | else if (BlendMask[Mask[i] % Size] != Mask[i]) |
12501 | return SDValue(); |
12502 | |
12503 | PermuteMask[i] = Mask[i] % Size; |
12504 | } |
12505 | |
12506 | |
12507 | |
12508 | unsigned EltSize = VT.getScalarSizeInBits(); |
12509 | if (ImmBlends && EltSize == 8 && !canWidenShuffleElements(BlendMask)) |
12510 | return SDValue(); |
12511 | |
12512 | SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); |
12513 | return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask); |
12514 | } |
12515 | |
12516 | |
12517 | |
12518 | |
12519 | |
12520 | |
12521 | static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, |
12522 | SDValue V1, SDValue V2, |
12523 | ArrayRef<int> Mask, |
12524 | SelectionDAG &DAG) { |
12525 | int NumElts = Mask.size(); |
12526 | int NumLanes = VT.getSizeInBits() / 128; |
12527 | int NumLaneElts = NumElts / NumLanes; |
12528 | int NumHalfLaneElts = NumLaneElts / 2; |
12529 | |
12530 | bool MatchLo = true, MatchHi = true; |
12531 | SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; |
12532 | |
12533 | |
12534 | for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) { |
12535 | for (int Elt = 0; Elt != NumLaneElts; ++Elt) { |
12536 | int M = Mask[Lane + Elt]; |
12537 | if (M < 0) |
12538 | continue; |
12539 | |
12540 | SDValue &Op = Ops[Elt & 1]; |
12541 | if (M < NumElts && (Op.isUndef() || Op == V1)) |
12542 | Op = V1; |
12543 | else if (NumElts <= M && (Op.isUndef() || Op == V2)) |
12544 | Op = V2; |
12545 | else |
12546 | return SDValue(); |
12547 | |
12548 | int Lo = Lane, Mid = Lane + NumHalfLaneElts, Hi = Lane + NumLaneElts; |
12549 | MatchLo &= isUndefOrInRange(M, Lo, Mid) || |
12550 | isUndefOrInRange(M, NumElts + Lo, NumElts + Mid); |
12551 | MatchHi &= isUndefOrInRange(M, Mid, Hi) || |
12552 | isUndefOrInRange(M, NumElts + Mid, NumElts + Hi); |
12553 | if (!MatchLo && !MatchHi) |
12554 | return SDValue(); |
12555 | } |
12556 | } |
12557 | assert((MatchLo ^ MatchHi) && "Failed to match UNPCKLO/UNPCKHI"); |
12558 | |
12559 | |
12560 | |
12561 | |
12562 | SmallVector<int, 32> PermuteMask(NumElts, -1); |
12563 | for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) { |
12564 | for (int Elt = 0; Elt != NumLaneElts; Elt += 2) { |
12565 | int M0 = Mask[Lane + Elt + 0]; |
12566 | int M1 = Mask[Lane + Elt + 1]; |
12567 | if (0 <= M0 && 0 <= M1 && |
12568 | (M0 % NumHalfLaneElts) != (M1 % NumHalfLaneElts)) |
12569 | return SDValue(); |
12570 | if (0 <= M0) |
12571 | PermuteMask[Lane + Elt + 0] = Lane + (2 * (M0 % NumHalfLaneElts)); |
12572 | if (0 <= M1) |
12573 | PermuteMask[Lane + Elt + 1] = Lane + (2 * (M1 % NumHalfLaneElts)) + 1; |
12574 | } |
12575 | } |
12576 | |
12577 | unsigned UnpckOp = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH; |
12578 | SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops); |
12579 | return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask); |
12580 | } |
12581 | |
12582 | |
12583 | |
12584 | static SDValue lowerShuffleAsByteRotateAndPermute( |
12585 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
12586 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
12587 | if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) || |
12588 | (VT.is256BitVector() && !Subtarget.hasAVX2()) || |
12589 | (VT.is512BitVector() && !Subtarget.hasBWI())) |
12590 | return SDValue(); |
12591 | |
12592 | |
12593 | if (is128BitLaneCrossingShuffleMask(VT, Mask)) |
12594 | return SDValue(); |
12595 | |
12596 | int Scale = VT.getScalarSizeInBits() / 8; |
12597 | int NumLanes = VT.getSizeInBits() / 128; |
12598 | int NumElts = VT.getVectorNumElements(); |
12599 | int NumEltsPerLane = NumElts / NumLanes; |
12600 | |
12601 | |
12602 | bool Blend1 = true; |
12603 | bool Blend2 = true; |
12604 | std::pair<int, int> Range1 = std::make_pair(INT_MAX, INT_MIN); |
12605 | std::pair<int, int> Range2 = std::make_pair(INT_MAX, INT_MIN); |
12606 | for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { |
12607 | for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { |
12608 | int M = Mask[Lane + Elt]; |
12609 | if (M < 0) |
12610 | continue; |
12611 | if (M < NumElts) { |
12612 | Blend1 &= (M == (Lane + Elt)); |
12613 | assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask"); |
12614 | M = M % NumEltsPerLane; |
12615 | Range1.first = std::min(Range1.first, M); |
12616 | Range1.second = std::max(Range1.second, M); |
12617 | } else { |
12618 | M -= NumElts; |
12619 | Blend2 &= (M == (Lane + Elt)); |
12620 | assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask"); |
12621 | M = M % NumEltsPerLane; |
12622 | Range2.first = std::min(Range2.first, M); |
12623 | Range2.second = std::max(Range2.second, M); |
12624 | } |
12625 | } |
12626 | } |
12627 | |
12628 | |
12629 | |
12630 | |
12631 | if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) || |
12632 | !(0 <= Range2.first && Range2.second < NumEltsPerLane)) |
12633 | return SDValue(); |
12634 | |
12635 | if (VT.getSizeInBits() > 128 && (Blend1 || Blend2)) |
12636 | return SDValue(); |
12637 | |
12638 | |
12639 | auto RotateAndPermute = [&](SDValue Lo, SDValue Hi, int RotAmt, int Ofs) { |
12640 | MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
12641 | SDValue Rotate = DAG.getBitcast( |
12642 | VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi), |
12643 | DAG.getBitcast(ByteVT, Lo), |
12644 | DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8))); |
12645 | SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef); |
12646 | for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { |
12647 | for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { |
12648 | int M = Mask[Lane + Elt]; |
12649 | if (M < 0) |
12650 | continue; |
12651 | if (M < NumElts) |
12652 | PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane); |
12653 | else |
12654 | PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane); |
12655 | } |
12656 | } |
12657 | return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask); |
12658 | }; |
12659 | |
12660 | |
12661 | if (Range2.second < Range1.first) |
12662 | return RotateAndPermute(V1, V2, Range1.first, 0); |
12663 | if (Range1.second < Range2.first) |
12664 | return RotateAndPermute(V2, V1, Range2.first, NumElts); |
12665 | return SDValue(); |
12666 | } |
12667 | |
12668 | |
12669 | |
12670 | |
12671 | |
12672 | |
12673 | |
12674 | |
12675 | static SDValue lowerShuffleAsDecomposedShuffleMerge( |
12676 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
12677 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
12678 | int NumElts = Mask.size(); |
12679 | int NumLanes = VT.getSizeInBits() / 128; |
12680 | int NumEltsPerLane = NumElts / NumLanes; |
12681 | |
12682 | |
12683 | |
12684 | bool IsAlternating = true; |
12685 | SmallVector<int, 32> V1Mask(NumElts, -1); |
12686 | SmallVector<int, 32> V2Mask(NumElts, -1); |
12687 | SmallVector<int, 32> FinalMask(NumElts, -1); |
12688 | for (int i = 0; i < NumElts; ++i) { |
12689 | int M = Mask[i]; |
12690 | if (M >= 0 && M < NumElts) { |
12691 | V1Mask[i] = M; |
12692 | FinalMask[i] = i; |
12693 | IsAlternating &= (i & 1) == 0; |
12694 | } else if (M >= NumElts) { |
12695 | V2Mask[i] = M - NumElts; |
12696 | FinalMask[i] = i + NumElts; |
12697 | IsAlternating &= (i & 1) == 1; |
12698 | } |
12699 | } |
12700 | |
12701 | |
12702 | |
12703 | |
12704 | |
12705 | |
12706 | if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) { |
12707 | |
12708 | if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, |
12709 | DAG, true)) |
12710 | return BlendPerm; |
12711 | if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, |
12712 | DAG)) |
12713 | return UnpackPerm; |
12714 | if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute( |
12715 | DL, VT, V1, V2, Mask, Subtarget, DAG)) |
12716 | return RotatePerm; |
12717 | |
12718 | if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, |
12719 | DAG)) |
12720 | return BlendPerm; |
12721 | } |
12722 | |
12723 | |
12724 | |
12725 | |
12726 | |
12727 | if (IsAlternating && VT.getScalarSizeInBits() < 32) { |
12728 | V1Mask.assign(NumElts, -1); |
12729 | V2Mask.assign(NumElts, -1); |
12730 | FinalMask.assign(NumElts, -1); |
12731 | for (int i = 0; i != NumElts; i += NumEltsPerLane) |
12732 | for (int j = 0; j != NumEltsPerLane; ++j) { |
12733 | int M = Mask[i + j]; |
12734 | if (M >= 0 && M < NumElts) { |
12735 | V1Mask[i + (j / 2)] = M; |
12736 | FinalMask[i + j] = i + (j / 2); |
12737 | } else if (M >= NumElts) { |
12738 | V2Mask[i + (j / 2)] = M - NumElts; |
12739 | FinalMask[i + j] = i + (j / 2) + NumElts; |
12740 | } |
12741 | } |
12742 | } |
12743 | |
12744 | V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); |
12745 | V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); |
12746 | return DAG.getVectorShuffle(VT, DL, V1, V2, FinalMask); |
12747 | } |
12748 | |
12749 | |
12750 | |
12751 | |
12752 | |
12753 | static int matchShuffleAsBitRotate(ArrayRef<int> Mask, int NumSubElts) { |
12754 | int NumElts = Mask.size(); |
12755 | assert((NumElts % NumSubElts) == 0 && "Illegal shuffle mask"); |
12756 | |
12757 | int RotateAmt = -1; |
12758 | for (int i = 0; i != NumElts; i += NumSubElts) { |
12759 | for (int j = 0; j != NumSubElts; ++j) { |
12760 | int M = Mask[i + j]; |
12761 | if (M < 0) |
12762 | continue; |
12763 | if (!isInRange(M, i, i + NumSubElts)) |
12764 | return -1; |
12765 | int Offset = (NumSubElts - (M - (i + j))) % NumSubElts; |
12766 | if (0 <= RotateAmt && Offset != RotateAmt) |
12767 | return -1; |
12768 | RotateAmt = Offset; |
12769 | } |
12770 | } |
12771 | return RotateAmt; |
12772 | } |
12773 | |
12774 | static int matchShuffleAsBitRotate(MVT &RotateVT, int EltSizeInBits, |
12775 | const X86Subtarget &Subtarget, |
12776 | ArrayRef<int> Mask) { |
12777 | assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); |
12778 | assert(EltSizeInBits < 64 && "Can't rotate 64-bit integers"); |
12779 | |
12780 | |
12781 | int MinSubElts = Subtarget.hasAVX512() ? std::max(32 / EltSizeInBits, 2) : 2; |
12782 | int MaxSubElts = 64 / EltSizeInBits; |
12783 | for (int NumSubElts = MinSubElts; NumSubElts <= MaxSubElts; NumSubElts *= 2) { |
12784 | int RotateAmt = matchShuffleAsBitRotate(Mask, NumSubElts); |
12785 | if (RotateAmt < 0) |
12786 | continue; |
12787 | |
12788 | int NumElts = Mask.size(); |
12789 | MVT RotateSVT = MVT::getIntegerVT(EltSizeInBits * NumSubElts); |
12790 | RotateVT = MVT::getVectorVT(RotateSVT, NumElts / NumSubElts); |
12791 | return RotateAmt * EltSizeInBits; |
12792 | } |
12793 | |
12794 | return -1; |
12795 | } |
12796 | |
12797 | |
12798 | static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1, |
12799 | ArrayRef<int> Mask, |
12800 | const X86Subtarget &Subtarget, |
12801 | SelectionDAG &DAG) { |
12802 | |
12803 | |
12804 | bool IsLegal = |
12805 | (VT.is128BitVector() && Subtarget.hasXOP()) || Subtarget.hasAVX512(); |
12806 | if (!IsLegal && Subtarget.hasSSE3()) |
12807 | return SDValue(); |
12808 | |
12809 | MVT RotateVT; |
12810 | int RotateAmt = matchShuffleAsBitRotate(RotateVT, VT.getScalarSizeInBits(), |
12811 | Subtarget, Mask); |
12812 | if (RotateAmt < 0) |
12813 | return SDValue(); |
12814 | |
12815 | |
12816 | |
12817 | |
12818 | if (!IsLegal) { |
12819 | if ((RotateAmt % 16) == 0) |
12820 | return SDValue(); |
12821 | |
12822 | unsigned ShlAmt = RotateAmt; |
12823 | unsigned SrlAmt = RotateVT.getScalarSizeInBits() - RotateAmt; |
12824 | V1 = DAG.getBitcast(RotateVT, V1); |
12825 | SDValue SHL = DAG.getNode(X86ISD::VSHLI, DL, RotateVT, V1, |
12826 | DAG.getTargetConstant(ShlAmt, DL, MVT::i8)); |
12827 | SDValue SRL = DAG.getNode(X86ISD::VSRLI, DL, RotateVT, V1, |
12828 | DAG.getTargetConstant(SrlAmt, DL, MVT::i8)); |
12829 | SDValue Rot = DAG.getNode(ISD::OR, DL, RotateVT, SHL, SRL); |
12830 | return DAG.getBitcast(VT, Rot); |
12831 | } |
12832 | |
12833 | SDValue Rot = |
12834 | DAG.getNode(X86ISD::VROTLI, DL, RotateVT, DAG.getBitcast(RotateVT, V1), |
12835 | DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); |
12836 | return DAG.getBitcast(VT, Rot); |
12837 | } |
12838 | |
12839 | |
12840 | |
12841 | |
12842 | static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2, |
12843 | ArrayRef<int> Mask) { |
12844 | int NumElts = Mask.size(); |
12845 | |
12846 | |
12847 | |
12848 | |
12849 | |
12850 | |
12851 | |
12852 | |
12853 | int Rotation = 0; |
12854 | SDValue Lo, Hi; |
12855 | for (int i = 0; i < NumElts; ++i) { |
12856 | int M = Mask[i]; |
12857 | assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) && |
12858 | "Unexpected mask index."); |
12859 | if (M < 0) |
12860 | continue; |
12861 | |
12862 | |
12863 | int StartIdx = i - (M % NumElts); |
12864 | if (StartIdx == 0) |
12865 | |
12866 | return -1; |
12867 | |
12868 | |
12869 | |
12870 | |
12871 | int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx; |
12872 | |
12873 | if (Rotation == 0) |
12874 | Rotation = CandidateRotation; |
12875 | else if (Rotation != CandidateRotation) |
12876 | |
12877 | return -1; |
12878 | |
12879 | |
12880 | SDValue MaskV = M < NumElts ? V1 : V2; |
12881 | |
12882 | |
12883 | |
12884 | |
12885 | SDValue &TargetV = StartIdx < 0 ? Hi : Lo; |
12886 | |
12887 | |
12888 | |
12889 | if (!TargetV) |
12890 | TargetV = MaskV; |
12891 | else if (TargetV != MaskV) |
12892 | |
12893 | |
12894 | return -1; |
12895 | } |
12896 | |
12897 | |
12898 | assert(Rotation != 0 && "Failed to locate a viable rotation!"); |
12899 | assert((Lo || Hi) && "Failed to find a rotated input vector!"); |
12900 | if (!Lo) |
12901 | Lo = Hi; |
12902 | else if (!Hi) |
12903 | Hi = Lo; |
12904 | |
12905 | V1 = Lo; |
12906 | V2 = Hi; |
12907 | |
12908 | return Rotation; |
12909 | } |
12910 | |
12911 | |
12912 | |
12913 | |
12914 | |
12915 | |
12916 | |
12917 | |
12918 | |
12919 | |
12920 | |
12921 | |
12922 | |
12923 | |
12924 | |
12925 | |
12926 | |
12927 | static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, |
12928 | ArrayRef<int> Mask) { |
12929 | |
12930 | if (isAnyZero(Mask)) |
12931 | return -1; |
12932 | |
12933 | |
12934 | SmallVector<int, 16> RepeatedMask; |
12935 | if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) |
12936 | return -1; |
12937 | |
12938 | int Rotation = matchShuffleAsElementRotate(V1, V2, RepeatedMask); |
12939 | if (Rotation <= 0) |
12940 | return -1; |
12941 | |
12942 | |
12943 | |
12944 | int NumElts = RepeatedMask.size(); |
12945 | int Scale = 16 / NumElts; |
12946 | return Rotation * Scale; |
12947 | } |
12948 | |
12949 | static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, |
12950 | SDValue V2, ArrayRef<int> Mask, |
12951 | const X86Subtarget &Subtarget, |
12952 | SelectionDAG &DAG) { |
12953 | assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); |
12954 | |
12955 | SDValue Lo = V1, Hi = V2; |
12956 | int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask); |
12957 | if (ByteRotation <= 0) |
12958 | return SDValue(); |
12959 | |
12960 | |
12961 | |
12962 | MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
12963 | Lo = DAG.getBitcast(ByteVT, Lo); |
12964 | Hi = DAG.getBitcast(ByteVT, Hi); |
12965 | |
12966 | |
12967 | if (Subtarget.hasSSSE3()) { |
12968 | assert((!VT.is512BitVector() || Subtarget.hasBWI()) && |
12969 | "512-bit PALIGNR requires BWI instructions"); |
12970 | return DAG.getBitcast( |
12971 | VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi, |
12972 | DAG.getTargetConstant(ByteRotation, DL, MVT::i8))); |
12973 | } |
12974 | |
12975 | assert(VT.is128BitVector() && |
12976 | "Rotate-based lowering only supports 128-bit lowering!"); |
12977 | assert(Mask.size() <= 16 && |
12978 | "Can shuffle at most 16 bytes in a 128-bit vector!"); |
12979 | assert(ByteVT == MVT::v16i8 && |
12980 | "SSE2 rotate lowering only needed for v16i8!"); |
12981 | |
12982 | |
12983 | int LoByteShift = 16 - ByteRotation; |
12984 | int HiByteShift = ByteRotation; |
12985 | |
12986 | SDValue LoShift = |
12987 | DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, |
12988 | DAG.getTargetConstant(LoByteShift, DL, MVT::i8)); |
12989 | SDValue HiShift = |
12990 | DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, |
12991 | DAG.getTargetConstant(HiByteShift, DL, MVT::i8)); |
12992 | return DAG.getBitcast(VT, |
12993 | DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); |
12994 | } |
12995 | |
12996 | |
12997 | |
12998 | |
12999 | |
13000 | |
13001 | |
13002 | |
13003 | |
13004 | |
13005 | |
13006 | static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1, |
13007 | SDValue V2, ArrayRef<int> Mask, |
13008 | const X86Subtarget &Subtarget, |
13009 | SelectionDAG &DAG) { |
13010 | assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && |
13011 | "Only 32-bit and 64-bit elements are supported!"); |
13012 | |
13013 | |
13014 | assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) |
13015 | && "VLX required for 128/256-bit vectors"); |
13016 | |
13017 | SDValue Lo = V1, Hi = V2; |
13018 | int Rotation = matchShuffleAsElementRotate(Lo, Hi, Mask); |
13019 | if (Rotation <= 0) |
13020 | return SDValue(); |
13021 | |
13022 | return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, |
13023 | DAG.getTargetConstant(Rotation, DL, MVT::i8)); |
13024 | } |
13025 | |
13026 | |
13027 | static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1, |
13028 | SDValue V2, ArrayRef<int> Mask, |
13029 | const APInt &Zeroable, |
13030 | const X86Subtarget &Subtarget, |
13031 | SelectionDAG &DAG) { |
13032 | assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); |
13033 | assert(VT.is128BitVector() && "Only 128-bit vectors supported"); |
13034 | |
13035 | |
13036 | |
13037 | unsigned ZeroLo = Zeroable.countTrailingOnes(); |
13038 | unsigned ZeroHi = Zeroable.countLeadingOnes(); |
13039 | if (!ZeroLo && !ZeroHi) |
13040 | return SDValue(); |
13041 | |
13042 | unsigned NumElts = Mask.size(); |
13043 | unsigned Len = NumElts - (ZeroLo + ZeroHi); |
13044 | if (!isSequentialOrUndefInRange(Mask, ZeroLo, Len, Mask[ZeroLo])) |
13045 | return SDValue(); |
13046 | |
13047 | unsigned Scale = VT.getScalarSizeInBits() / 8; |
13048 | ArrayRef<int> StubMask = Mask.slice(ZeroLo, Len); |
13049 | if (!isUndefOrInRange(StubMask, 0, NumElts) && |
13050 | !isUndefOrInRange(StubMask, NumElts, 2 * NumElts)) |
13051 | return SDValue(); |
13052 | |
13053 | SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2; |
13054 | Res = DAG.getBitcast(MVT::v16i8, Res); |
13055 | |
13056 | |
13057 | |
13058 | |
13059 | |
13060 | |
13061 | if (ZeroLo == 0) { |
13062 | unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); |
13063 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
13064 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
13065 | Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, |
13066 | DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8)); |
13067 | } else if (ZeroHi == 0) { |
13068 | unsigned Shift = Mask[ZeroLo] % NumElts; |
13069 | Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, |
13070 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
13071 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
13072 | DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); |
13073 | } else if (!Subtarget.hasSSSE3()) { |
13074 | |
13075 | |
13076 | |
13077 | unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); |
13078 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
13079 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
13080 | Shift += Mask[ZeroLo] % NumElts; |
13081 | Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, |
13082 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
13083 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
13084 | DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); |
13085 | } else |
13086 | return SDValue(); |
13087 | |
13088 | return DAG.getBitcast(VT, Res); |
13089 | } |
13090 | |
13091 | |
13092 | |
13093 | |
13094 | |
13095 | |
13096 | |
13097 | |
13098 | |
13099 | |
13100 | |
13101 | |
13102 | |
13103 | |
13104 | |
13105 | |
13106 | |
13107 | |
13108 | |
13109 | |
13110 | |
13111 | |
13112 | |
13113 | |
13114 | static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, |
13115 | unsigned ScalarSizeInBits, ArrayRef<int> Mask, |
13116 | int MaskOffset, const APInt &Zeroable, |
13117 | const X86Subtarget &Subtarget) { |
13118 | int Size = Mask.size(); |
13119 | unsigned SizeInBits = Size * ScalarSizeInBits; |
13120 | |
13121 | auto CheckZeros = [&](int Shift, int Scale, bool Left) { |
13122 | for (int i = 0; i < Size; i += Scale) |
13123 | for (int j = 0; j < Shift; ++j) |
13124 | if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))]) |
13125 | return false; |
13126 | |
13127 | return true; |
13128 | }; |
13129 | |
13130 | auto MatchShift = [&](int Shift, int Scale, bool Left) { |
13131 | for (int i = 0; i != Size; i += Scale) { |
13132 | unsigned Pos = Left ? i + Shift : i; |
13133 | unsigned Low = Left ? i : i + Shift; |
13134 | unsigned Len = Scale - Shift; |
13135 | if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset)) |
13136 | return -1; |
13137 | } |
13138 | |
13139 | int ShiftEltBits = ScalarSizeInBits * Scale; |
13140 | bool ByteShift = ShiftEltBits > 64; |
13141 | Opcode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI) |
13142 | : (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI); |
13143 | int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1); |
13144 | |
13145 | |
13146 | |
13147 | Scale = ByteShift ? Scale / 2 : Scale; |
13148 | |
13149 | |
13150 | MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale); |
13151 | ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8) |
13152 | : MVT::getVectorVT(ShiftSVT, Size / Scale); |
13153 | return (int)ShiftAmt; |
13154 | }; |
13155 | |
13156 | |
13157 | |
13158 | |
13159 | |
13160 | |
13161 | |
13162 | unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128); |
13163 | for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2) |
13164 | for (int Shift = 1; Shift != Scale; ++Shift) |
13165 | for (bool Left : {true, false}) |
13166 | if (CheckZeros(Shift, Scale, Left)) { |
13167 | int ShiftAmt = MatchShift(Shift, Scale, Left); |
13168 | if (0 < ShiftAmt) |
13169 | return ShiftAmt; |
13170 | } |
13171 | |
13172 | |
13173 | return -1; |
13174 | } |
13175 | |
13176 | static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, |
13177 | SDValue V2, ArrayRef<int> Mask, |
13178 | const APInt &Zeroable, |
13179 | const X86Subtarget &Subtarget, |
13180 | SelectionDAG &DAG) { |
13181 | int Size = Mask.size(); |
13182 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); |
13183 | |
13184 | MVT ShiftVT; |
13185 | SDValue V = V1; |
13186 | unsigned Opcode; |
13187 | |
13188 | |
13189 | int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), |
13190 | Mask, 0, Zeroable, Subtarget); |
13191 | |
13192 | |
13193 | if (ShiftAmt < 0) { |
13194 | ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), |
13195 | Mask, Size, Zeroable, Subtarget); |
13196 | V = V2; |
13197 | } |
13198 | |
13199 | if (ShiftAmt < 0) |
13200 | return SDValue(); |
13201 | |
13202 | assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && |
13203 | "Illegal integer vector type"); |
13204 | V = DAG.getBitcast(ShiftVT, V); |
13205 | V = DAG.getNode(Opcode, DL, ShiftVT, V, |
13206 | DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
13207 | return DAG.getBitcast(VT, V); |
13208 | } |
13209 | |
13210 | |
13211 | |
13212 | static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, |
13213 | ArrayRef<int> Mask, uint64_t &BitLen, |
13214 | uint64_t &BitIdx, const APInt &Zeroable) { |
13215 | int Size = Mask.size(); |
13216 | int HalfSize = Size / 2; |
13217 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); |
13218 | assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask"); |
13219 | |
13220 | |
13221 | if (!isUndefUpperHalf(Mask)) |
13222 | return false; |
13223 | |
13224 | |
13225 | |
13226 | int Len = HalfSize; |
13227 | for (; Len > 0; --Len) |
13228 | if (!Zeroable[Len - 1]) |
13229 | break; |
13230 | assert(Len > 0 && "Zeroable shuffle mask"); |
13231 | |
13232 | |
13233 | SDValue Src; |
13234 | int Idx = -1; |
13235 | for (int i = 0; i != Len; ++i) { |
13236 | int M = Mask[i]; |
13237 | if (M == SM_SentinelUndef) |
13238 | continue; |
13239 | SDValue &V = (M < Size ? V1 : V2); |
13240 | M = M % Size; |
13241 | |
13242 | |
13243 | |
13244 | if (i > M || M >= HalfSize) |
13245 | return false; |
13246 | |
13247 | if (Idx < 0 || (Src == V && Idx == (M - i))) { |
13248 | Src = V; |
13249 | Idx = M - i; |
13250 | continue; |
13251 | } |
13252 | return false; |
13253 | } |
13254 | |
13255 | if (!Src || Idx < 0) |
13256 | return false; |
13257 | |
13258 | assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); |
13259 | BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; |
13260 | BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; |
13261 | V1 = Src; |
13262 | return true; |
13263 | } |
13264 | |
13265 | |
13266 | |
13267 | |
13268 | static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, |
13269 | ArrayRef<int> Mask, uint64_t &BitLen, |
13270 | uint64_t &BitIdx) { |
13271 | int Size = Mask.size(); |
13272 | int HalfSize = Size / 2; |
13273 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); |
13274 | |
13275 | |
13276 | if (!isUndefUpperHalf(Mask)) |
13277 | return false; |
13278 | |
13279 | for (int Idx = 0; Idx != HalfSize; ++Idx) { |
13280 | SDValue Base; |
13281 | |
13282 | |
13283 | if (isUndefInRange(Mask, 0, Idx)) { |
13284 | |
13285 | } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { |
13286 | Base = V1; |
13287 | } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { |
13288 | Base = V2; |
13289 | } else { |
13290 | continue; |
13291 | } |
13292 | |
13293 | |
13294 | |
13295 | for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { |
13296 | SDValue Insert; |
13297 | int Len = Hi - Idx; |
13298 | |
13299 | |
13300 | if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { |
13301 | Insert = V1; |
13302 | } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { |
13303 | Insert = V2; |
13304 | } else { |
13305 | continue; |
13306 | } |
13307 | |
13308 | |
13309 | if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { |
13310 | |
13311 | } else if ((!Base || (Base == V1)) && |
13312 | isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { |
13313 | Base = V1; |
13314 | } else if ((!Base || (Base == V2)) && |
13315 | isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, |
13316 | Size + Hi)) { |
13317 | Base = V2; |
13318 | } else { |
13319 | continue; |
13320 | } |
13321 | |
13322 | BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; |
13323 | BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; |
13324 | V1 = Base; |
13325 | V2 = Insert; |
13326 | return true; |
13327 | } |
13328 | } |
13329 | |
13330 | return false; |
13331 | } |
13332 | |
13333 | |
13334 | static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, |
13335 | SDValue V2, ArrayRef<int> Mask, |
13336 | const APInt &Zeroable, SelectionDAG &DAG) { |
13337 | uint64_t BitLen, BitIdx; |
13338 | if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) |
13339 | return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, |
13340 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
13341 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
13342 | |
13343 | if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) |
13344 | return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), |
13345 | V2 ? V2 : DAG.getUNDEF(VT), |
13346 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
13347 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
13348 | |
13349 | return SDValue(); |
13350 | } |
13351 | |
13352 | |
13353 | |
13354 | |
13355 | |
13356 | |
13357 | |
13358 | |
13359 | |
13360 | |
13361 | static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( |
13362 | const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, |
13363 | ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
13364 | assert(Scale > 1 && "Need a scale to extend."); |
13365 | int EltBits = VT.getScalarSizeInBits(); |
13366 | int NumElements = VT.getVectorNumElements(); |
13367 | int NumEltsPerLane = 128 / EltBits; |
13368 | int OffsetLane = Offset / NumEltsPerLane; |
13369 | assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && |
13370 | "Only 8, 16, and 32 bit elements can be extended."); |
13371 | assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits."); |
13372 | assert(0 <= Offset && "Extension offset must be positive."); |
13373 | assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) && |
13374 | "Extension offset must be in the first lane or start an upper lane."); |
13375 | |
13376 | |
13377 | auto SafeOffset = [&](int Idx) { |
13378 | return OffsetLane == (Idx / NumEltsPerLane); |
13379 | }; |
13380 | |
13381 | |
13382 | auto ShuffleOffset = [&](SDValue V) { |
13383 | if (!Offset) |
13384 | return V; |
13385 | |
13386 | SmallVector<int, 8> ShMask((unsigned)NumElements, -1); |
13387 | for (int i = 0; i * Scale < NumElements; ++i) { |
13388 | int SrcIdx = i + Offset; |
13389 | ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1; |
13390 | } |
13391 | return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask); |
13392 | }; |
13393 | |
13394 | |
13395 | |
13396 | if (Subtarget.hasSSE41()) { |
13397 | |
13398 | |
13399 | if (Offset && Scale == 2 && VT.is128BitVector()) |
13400 | return SDValue(); |
13401 | MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), |
13402 | NumElements / Scale); |
13403 | InputV = ShuffleOffset(InputV); |
13404 | InputV = getEXTEND_VECTOR_INREG(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND, |
13405 | DL, ExtVT, InputV, DAG); |
13406 | return DAG.getBitcast(VT, InputV); |
13407 | } |
13408 | |
13409 | assert(VT.is128BitVector() && "Only 128-bit vectors can be extended."); |
13410 | |
13411 | |
13412 | |
13413 | if (AnyExt && EltBits == 32) { |
13414 | int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1, |
13415 | -1}; |
13416 | return DAG.getBitcast( |
13417 | VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, |
13418 | DAG.getBitcast(MVT::v4i32, InputV), |
13419 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
13420 | } |
13421 | if (AnyExt && EltBits == 16 && Scale > 2) { |
13422 | int PSHUFDMask[4] = {Offset / 2, -1, |
13423 | SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1}; |
13424 | InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, |
13425 | DAG.getBitcast(MVT::v4i32, InputV), |
13426 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); |
13427 | int PSHUFWMask[4] = {1, -1, -1, -1}; |
13428 | unsigned OddEvenOp = (Offset & 1) ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; |
13429 | return DAG.getBitcast( |
13430 | VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16, |
13431 | DAG.getBitcast(MVT::v8i16, InputV), |
13432 | getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG))); |
13433 | } |
13434 | |
13435 | |
13436 | |
13437 | if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) { |
13438 | assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!"); |
13439 | assert(VT.is128BitVector() && "Unexpected vector width!"); |
13440 | |
13441 | int LoIdx = Offset * EltBits; |
13442 | SDValue Lo = DAG.getBitcast( |
13443 | MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, |
13444 | DAG.getTargetConstant(EltBits, DL, MVT::i8), |
13445 | DAG.getTargetConstant(LoIdx, DL, MVT::i8))); |
13446 | |
13447 | if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1)) |
13448 | return DAG.getBitcast(VT, Lo); |
13449 | |
13450 | int HiIdx = (Offset + 1) * EltBits; |
13451 | SDValue Hi = DAG.getBitcast( |
13452 | MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, |
13453 | DAG.getTargetConstant(EltBits, DL, MVT::i8), |
13454 | DAG.getTargetConstant(HiIdx, DL, MVT::i8))); |
13455 | return DAG.getBitcast(VT, |
13456 | DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); |
13457 | } |
13458 | |
13459 | |
13460 | |
13461 | |
13462 | if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) { |
13463 | assert(NumElements == 16 && "Unexpected byte vector width!"); |
13464 | SDValue PSHUFBMask[16]; |
13465 | for (int i = 0; i < 16; ++i) { |
13466 | int Idx = Offset + (i / Scale); |
13467 | if ((i % Scale == 0 && SafeOffset(Idx))) { |
13468 | PSHUFBMask[i] = DAG.getConstant(Idx, DL, MVT::i8); |
13469 | continue; |
13470 | } |
13471 | PSHUFBMask[i] = |
13472 | AnyExt ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(0x80, DL, MVT::i8); |
13473 | } |
13474 | InputV = DAG.getBitcast(MVT::v16i8, InputV); |
13475 | return DAG.getBitcast( |
13476 | VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, |
13477 | DAG.getBuildVector(MVT::v16i8, DL, PSHUFBMask))); |
13478 | } |
13479 | |
13480 | |
13481 | |
13482 | int AlignToUnpack = Offset % (NumElements / Scale); |
13483 | if (AlignToUnpack) { |
13484 | SmallVector<int, 8> ShMask((unsigned)NumElements, -1); |
13485 | for (int i = AlignToUnpack; i < NumElements; ++i) |
13486 | ShMask[i - AlignToUnpack] = i; |
13487 | InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask); |
13488 | Offset -= AlignToUnpack; |
13489 | } |
13490 | |
13491 | |
13492 | do { |
13493 | unsigned UnpackLoHi = X86ISD::UNPCKL; |
13494 | if (Offset >= (NumElements / 2)) { |
13495 | UnpackLoHi = X86ISD::UNPCKH; |
13496 | Offset -= (NumElements / 2); |
13497 | } |
13498 | |
13499 | MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements); |
13500 | SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT) |
13501 | : getZeroVector(InputVT, Subtarget, DAG, DL); |
13502 | InputV = DAG.getBitcast(InputVT, InputV); |
13503 | InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext); |
13504 | Scale /= 2; |
13505 | EltBits *= 2; |
13506 | NumElements /= 2; |
13507 | } while (Scale > 1); |
13508 | return DAG.getBitcast(VT, InputV); |
13509 | } |
13510 | |
13511 | |
13512 | |
13513 | |
13514 | |
13515 | |
13516 | |
13517 | |
13518 | |
13519 | |
13520 | |
13521 | |
13522 | |
13523 | static SDValue lowerShuffleAsZeroOrAnyExtend( |
13524 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
13525 | const APInt &Zeroable, const X86Subtarget &Subtarget, |
13526 | SelectionDAG &DAG) { |
13527 | int Bits = VT.getSizeInBits(); |
13528 | int NumLanes = Bits / 128; |
13529 | int NumElements = VT.getVectorNumElements(); |
13530 | int NumEltsPerLane = NumElements / NumLanes; |
13531 | assert(VT.getScalarSizeInBits() <= 32 && |
13532 | "Exceeds 32-bit integer zero extension limit"); |
13533 | assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size"); |
13534 | |
13535 | |
13536 | |
13537 | auto Lower = [&](int Scale) -> SDValue { |
13538 | SDValue InputV; |
13539 | bool AnyExt = true; |
13540 | int Offset = 0; |
13541 | int Matches = 0; |
13542 | for (int i = 0; i < NumElements; ++i) { |
13543 | int M = Mask[i]; |
13544 | if (M < 0) |
13545 | continue; |
13546 | if (i % Scale != 0) { |
13547 | |
13548 | if (!Zeroable[i]) |
13549 | return SDValue(); |
13550 | |
13551 | |
13552 | AnyExt = false; |
13553 | continue; |
13554 | } |
13555 | |
13556 | |
13557 | |
13558 | SDValue V = M < NumElements ? V1 : V2; |
13559 | M = M % NumElements; |
13560 | if (!InputV) { |
13561 | InputV = V; |
13562 | Offset = M - (i / Scale); |
13563 | } else if (InputV != V) |
13564 | return SDValue(); |
13565 | |
13566 | |
13567 | |
13568 | |
13569 | if (!((0 <= Offset && Offset < NumEltsPerLane) || |
13570 | (Offset % NumEltsPerLane) == 0)) |
13571 | return SDValue(); |
13572 | |
13573 | |
13574 | |
13575 | if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane)) |
13576 | return SDValue(); |
13577 | |
13578 | if ((M % NumElements) != (Offset + (i / Scale))) |
13579 | return SDValue(); |
13580 | Matches++; |
13581 | } |
13582 | |
13583 | |
13584 | |
13585 | |
13586 | if (!InputV) |
13587 | return SDValue(); |
13588 | |
13589 | |
13590 | |
13591 | if (Offset != 0 && Matches < 2) |
13592 | return SDValue(); |
13593 | |
13594 | return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt, |
13595 | InputV, Mask, Subtarget, DAG); |
13596 | }; |
13597 | |
13598 | |
13599 | assert(Bits % 64 == 0 && |
13600 | "The number of bits in a vector must be divisible by 64 on x86!"); |
13601 | int NumExtElements = Bits / 64; |
13602 | |
13603 | |
13604 | |
13605 | for (; NumExtElements < NumElements; NumExtElements *= 2) { |
13606 | assert(NumElements % NumExtElements == 0 && |
13607 | "The input vector size must be divisible by the extended size."); |
13608 | if (SDValue V = Lower(NumElements / NumExtElements)) |
13609 | return V; |
13610 | } |
13611 | |
13612 | |
13613 | if (Bits != 128) |
13614 | return SDValue(); |
13615 | |
13616 | |
13617 | |
13618 | auto CanZExtLowHalf = [&]() { |
13619 | for (int i = NumElements / 2; i != NumElements; ++i) |
13620 | if (!Zeroable[i]) |
13621 | return SDValue(); |
13622 | if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0)) |
13623 | return V1; |
13624 | if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements)) |
13625 | return V2; |
13626 | return SDValue(); |
13627 | }; |
13628 | |
13629 | if (SDValue V = CanZExtLowHalf()) { |
13630 | V = DAG.getBitcast(MVT::v2i64, V); |
13631 | V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V); |
13632 | return DAG.getBitcast(VT, V); |
13633 | } |
13634 | |
13635 | |
13636 | return SDValue(); |
13637 | } |
13638 | |
13639 | |
13640 | |
13641 | |
13642 | static SDValue getScalarValueForVectorElement(SDValue V, int Idx, |
13643 | SelectionDAG &DAG) { |
13644 | MVT VT = V.getSimpleValueType(); |
13645 | MVT EltVT = VT.getVectorElementType(); |
13646 | V = peekThroughBitcasts(V); |
13647 | |
13648 | |
13649 | |
13650 | MVT NewVT = V.getSimpleValueType(); |
13651 | if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) |
13652 | return SDValue(); |
13653 | |
13654 | if (V.getOpcode() == ISD::BUILD_VECTOR || |
13655 | (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) { |
13656 | |
13657 | |
13658 | SDValue S = V.getOperand(Idx); |
13659 | if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits()) |
13660 | return DAG.getBitcast(EltVT, S); |
13661 | } |
13662 | |
13663 | return SDValue(); |
13664 | } |
13665 | |
13666 | |
13667 | |
13668 | |
13669 | |
13670 | static bool isShuffleFoldableLoad(SDValue V) { |
13671 | V = peekThroughBitcasts(V); |
13672 | return ISD::isNON_EXTLoad(V.getNode()); |
13673 | } |
13674 | |
13675 | |
13676 | |
13677 | |
13678 | |
13679 | static SDValue lowerShuffleAsElementInsertion( |
13680 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
13681 | const APInt &Zeroable, const X86Subtarget &Subtarget, |
13682 | SelectionDAG &DAG) { |
13683 | MVT ExtVT = VT; |
13684 | MVT EltVT = VT.getVectorElementType(); |
13685 | |
13686 | int V2Index = |
13687 | find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) - |
13688 | Mask.begin(); |
13689 | bool IsV1Zeroable = true; |
13690 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
13691 | if (i != V2Index && !Zeroable[i]) { |
13692 | IsV1Zeroable = false; |
13693 | break; |
13694 | } |
13695 | |
13696 | |
13697 | |
13698 | |
13699 | |
13700 | |
13701 | SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), |
13702 | DAG); |
13703 | if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) { |
13704 | |
13705 | V2S = DAG.getBitcast(EltVT, V2S); |
13706 | if (EltVT == MVT::i8 || (EltVT == MVT::i16 && !Subtarget.hasFP16())) { |
13707 | |
13708 | |
13709 | if (!IsV1Zeroable) |
13710 | return SDValue(); |
13711 | |
13712 | |
13713 | ExtVT = MVT::getVectorVT(MVT::i32, ExtVT.getSizeInBits() / 32); |
13714 | V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S); |
13715 | } |
13716 | V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S); |
13717 | } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 || |
13718 | EltVT == MVT::i16) { |
13719 | |
13720 | |
13721 | return SDValue(); |
13722 | } |
13723 | |
13724 | if (!IsV1Zeroable) { |
13725 | |
13726 | |
13727 | |
13728 | assert(VT == ExtVT && "Cannot change extended type when non-zeroable!"); |
13729 | if (!VT.isFloatingPoint() || V2Index != 0) |
13730 | return SDValue(); |
13731 | SmallVector<int, 8> V1Mask(Mask.begin(), Mask.end()); |
13732 | V1Mask[V2Index] = -1; |
13733 | if (!isNoopShuffleMask(V1Mask)) |
13734 | return SDValue(); |
13735 | if (!VT.is128BitVector()) |
13736 | return SDValue(); |
13737 | |
13738 | |
13739 | unsigned MovOpc = 0; |
13740 | if (EltVT == MVT::f16) |
13741 | MovOpc = X86ISD::MOVSH; |
13742 | else if (EltVT == MVT::f32) |
13743 | MovOpc = X86ISD::MOVSS; |
13744 | else if (EltVT == MVT::f64) |
13745 | MovOpc = X86ISD::MOVSD; |
13746 | else |
13747 | llvm_unreachable("Unsupported floating point element type to handle!"); |
13748 | return DAG.getNode(MovOpc, DL, ExtVT, V1, V2); |
13749 | } |
13750 | |
13751 | |
13752 | if (VT.isFloatingPoint() && V2Index != 0) |
13753 | return SDValue(); |
13754 | |
13755 | V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2); |
13756 | if (ExtVT != VT) |
13757 | V2 = DAG.getBitcast(VT, V2); |
13758 | |
13759 | if (V2Index != 0) { |
13760 | |
13761 | |
13762 | |
13763 | |
13764 | if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) { |
13765 | SmallVector<int, 4> V2Shuffle(Mask.size(), 1); |
13766 | V2Shuffle[V2Index] = 0; |
13767 | V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); |
13768 | } else { |
13769 | V2 = DAG.getBitcast(MVT::v16i8, V2); |
13770 | V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2, |
13771 | DAG.getTargetConstant( |
13772 | V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); |
13773 | V2 = DAG.getBitcast(VT, V2); |
13774 | } |
13775 | } |
13776 | return V2; |
13777 | } |
13778 | |
13779 | |
13780 | |
13781 | |
13782 | |
13783 | static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0, |
13784 | int BroadcastIdx, |
13785 | const X86Subtarget &Subtarget, |
13786 | SelectionDAG &DAG) { |
13787 | assert(Subtarget.hasAVX2() && |
13788 | "We can only lower integer broadcasts with AVX2!"); |
13789 | |
13790 | MVT EltVT = VT.getVectorElementType(); |
13791 | MVT V0VT = V0.getSimpleValueType(); |
13792 | |
13793 | assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!"); |
13794 | assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!"); |
13795 | |
13796 | MVT V0EltVT = V0VT.getVectorElementType(); |
13797 | if (!V0EltVT.isInteger()) |
13798 | return SDValue(); |
13799 | |
13800 | const unsigned EltSize = EltVT.getSizeInBits(); |
13801 | const unsigned V0EltSize = V0EltVT.getSizeInBits(); |
13802 | |
13803 | |
13804 | if (V0EltSize <= EltSize) |
13805 | return SDValue(); |
13806 | |
13807 | assert(((V0EltSize % EltSize) == 0) && |
13808 | "Scalar type sizes must all be powers of 2 on x86!"); |
13809 | |
13810 | const unsigned V0Opc = V0.getOpcode(); |
13811 | const unsigned Scale = V0EltSize / EltSize; |
13812 | const unsigned V0BroadcastIdx = BroadcastIdx / Scale; |
13813 | |
13814 | if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) && |
13815 | V0Opc != ISD::BUILD_VECTOR) |
13816 | return SDValue(); |
13817 | |
13818 | SDValue Scalar = V0.getOperand(V0BroadcastIdx); |
13819 | |
13820 | |
13821 | |
13822 | |
13823 | |
13824 | if (const int OffsetIdx = BroadcastIdx % Scale) |
13825 | Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar, |
13826 | DAG.getConstant(OffsetIdx * EltSize, DL, MVT::i8)); |
13827 | |
13828 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
13829 | DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar)); |
13830 | } |
13831 | |
13832 | |
13833 | |
13834 | |
13835 | |
13836 | static bool isSingleSHUFPSMask(ArrayRef<int> Mask) { |
13837 | |
13838 | assert(Mask.size() == 4 && "Unsupported mask size!"); |
13839 | assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!"); |
13840 | assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!"); |
13841 | assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!"); |
13842 | assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!"); |
13843 | |
13844 | |
13845 | |
13846 | if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4)) |
13847 | return false; |
13848 | if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4)) |
13849 | return false; |
13850 | |
13851 | return true; |
13852 | } |
13853 | |
13854 | |
13855 | |
13856 | |
13857 | static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0, |
13858 | SDValue N1, ArrayRef<int> Mask, |
13859 | SelectionDAG &DAG) { |
13860 | MVT VT = N0.getSimpleValueType(); |
13861 | assert((VT.is128BitVector() && |
13862 | (VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64)) && |
13863 | "VPERM* family of shuffles requires 32-bit or 64-bit elements"); |
13864 | |
13865 | |
13866 | if (!N0.hasOneUse() || !N1.hasOneUse() || |
13867 | N0.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
13868 | N1.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
13869 | N0.getOperand(0) != N1.getOperand(0)) |
13870 | return SDValue(); |
13871 | |
13872 | SDValue WideVec = N0.getOperand(0); |
13873 | MVT WideVT = WideVec.getSimpleValueType(); |
13874 | if (!WideVT.is256BitVector()) |
13875 | return SDValue(); |
13876 | |
13877 | |
13878 | |
13879 | unsigned NumElts = VT.getVectorNumElements(); |
13880 | SmallVector<int, 4> NewMask(Mask.begin(), Mask.end()); |
13881 | const APInt &ExtIndex0 = N0.getConstantOperandAPInt(1); |
13882 | const APInt &ExtIndex1 = N1.getConstantOperandAPInt(1); |
13883 | if (ExtIndex1 == 0 && ExtIndex0 == NumElts) |
13884 | ShuffleVectorSDNode::commuteMask(NewMask); |
13885 | else if (ExtIndex0 != 0 || ExtIndex1 != NumElts) |
13886 | return SDValue(); |
13887 | |
13888 | |
13889 | |
13890 | |
13891 | if (NumElts == 4 && |
13892 | (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask))) |
13893 | return SDValue(); |
13894 | |
13895 | |
13896 | NewMask.append(NumElts, -1); |
13897 | |
13898 | |
13899 | SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT), |
13900 | NewMask); |
13901 | |
13902 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf, |
13903 | DAG.getIntPtrConstant(0, DL)); |
13904 | } |
13905 | |
13906 | |
13907 | |
13908 | |
13909 | |
13910 | |
13911 | static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, |
13912 | SDValue V2, ArrayRef<int> Mask, |
13913 | const X86Subtarget &Subtarget, |
13914 | SelectionDAG &DAG) { |
13915 | if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) || |
13916 | (Subtarget.hasAVX() && VT.isFloatingPoint()) || |
13917 | (Subtarget.hasAVX2() && VT.isInteger()))) |
13918 | return SDValue(); |
13919 | |
13920 | |
13921 | |
13922 | unsigned NumEltBits = VT.getScalarSizeInBits(); |
13923 | unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2()) |
13924 | ? X86ISD::MOVDDUP |
13925 | : X86ISD::VBROADCAST; |
13926 | bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) || Subtarget.hasAVX2(); |
13927 | |
13928 | |
13929 | int BroadcastIdx = getSplatIndex(Mask); |
13930 | if (BroadcastIdx < 0) |
13931 | return SDValue(); |
13932 | assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with " |
13933 | "a sorted mask where the broadcast " |
13934 | "comes from V1."); |
13935 | |
13936 | |
13937 | |
13938 | |
13939 | |
13940 | int BitOffset = BroadcastIdx * NumEltBits; |
13941 | SDValue V = V1; |
13942 | for (;;) { |
13943 | switch (V.getOpcode()) { |
13944 | case ISD::BITCAST: { |
13945 | V = V.getOperand(0); |
13946 | continue; |
13947 | } |
13948 | case ISD::CONCAT_VECTORS: { |
13949 | int OpBitWidth = V.getOperand(0).getValueSizeInBits(); |
13950 | int OpIdx = BitOffset / OpBitWidth; |
13951 | V = V.getOperand(OpIdx); |
13952 | BitOffset %= OpBitWidth; |
13953 | continue; |
13954 | } |
13955 | case ISD::EXTRACT_SUBVECTOR: { |
13956 | |
13957 | unsigned EltBitWidth = V.getScalarValueSizeInBits(); |
13958 | unsigned Idx = V.getConstantOperandVal(1); |
13959 | unsigned BeginOffset = Idx * EltBitWidth; |
13960 | BitOffset += BeginOffset; |
13961 | V = V.getOperand(0); |
13962 | continue; |
13963 | } |
13964 | case ISD::INSERT_SUBVECTOR: { |
13965 | SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1); |
13966 | int EltBitWidth = VOuter.getScalarValueSizeInBits(); |
13967 | int Idx = (int)V.getConstantOperandVal(2); |
13968 | int NumSubElts = (int)VInner.getSimpleValueType().getVectorNumElements(); |
13969 | int BeginOffset = Idx * EltBitWidth; |
13970 | int EndOffset = BeginOffset + NumSubElts * EltBitWidth; |
13971 | if (BeginOffset <= BitOffset && BitOffset < EndOffset) { |
13972 | BitOffset -= BeginOffset; |
13973 | V = VInner; |
13974 | } else { |
13975 | V = VOuter; |
13976 | } |
13977 | continue; |
13978 | } |
13979 | } |
13980 | break; |
13981 | } |
13982 | assert((BitOffset % NumEltBits) == 0 && "Illegal bit-offset"); |
13983 | BroadcastIdx = BitOffset / NumEltBits; |
13984 | |
13985 | |
13986 | bool BitCastSrc = V.getScalarValueSizeInBits() != NumEltBits; |
13987 | |
13988 | |
13989 | |
13990 | |
13991 | |
13992 | |
13993 | if (BitCastSrc && VT.isInteger()) |
13994 | if (SDValue TruncBroadcast = lowerShuffleAsTruncBroadcast( |
13995 | DL, VT, V, BroadcastIdx, Subtarget, DAG)) |
13996 | return TruncBroadcast; |
13997 | |
13998 | |
13999 | if (!BitCastSrc && |
14000 | ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) || |
14001 | (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0))) { |
14002 | V = V.getOperand(BroadcastIdx); |
14003 | |
14004 | |
14005 | if (!BroadcastFromReg && !isShuffleFoldableLoad(V)) |
14006 | return SDValue(); |
14007 | } else if (ISD::isNormalLoad(V.getNode()) && |
14008 | cast<LoadSDNode>(V)->isSimple()) { |
14009 | |
14010 | |
14011 | |
14012 | |
14013 | |
14014 | LoadSDNode *Ld = cast<LoadSDNode>(V); |
14015 | SDValue BaseAddr = Ld->getOperand(1); |
14016 | MVT SVT = VT.getScalarType(); |
14017 | unsigned Offset = BroadcastIdx * SVT.getStoreSize(); |
14018 | assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset"); |
14019 | SDValue NewAddr = |
14020 | DAG.getMemBasePlusOffset(BaseAddr, TypeSize::Fixed(Offset), DL); |
14021 | |
14022 | |
14023 | |
14024 | |
14025 | if (Opcode == X86ISD::VBROADCAST) { |
14026 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
14027 | SDValue Ops[] = {Ld->getChain(), NewAddr}; |
14028 | V = DAG.getMemIntrinsicNode( |
14029 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT, |
14030 | DAG.getMachineFunction().getMachineMemOperand( |
14031 | Ld->getMemOperand(), Offset, SVT.getStoreSize())); |
14032 | DAG.makeEquivalentMemoryOrdering(Ld, V); |
14033 | return DAG.getBitcast(VT, V); |
14034 | } |
14035 | assert(SVT == MVT::f64 && "Unexpected VT!"); |
14036 | V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, |
14037 | DAG.getMachineFunction().getMachineMemOperand( |
14038 | Ld->getMemOperand(), Offset, SVT.getStoreSize())); |
14039 | DAG.makeEquivalentMemoryOrdering(Ld, V); |
14040 | } else if (!BroadcastFromReg) { |
14041 | |
14042 | return SDValue(); |
14043 | } else if (BitOffset != 0) { |
14044 | |
14045 | |
14046 | |
14047 | if (!VT.is256BitVector() && !VT.is512BitVector()) |
14048 | return SDValue(); |
14049 | |
14050 | |
14051 | if (VT == MVT::v4f64 || VT == MVT::v4i64) |
14052 | return SDValue(); |
14053 | |
14054 | |
14055 | if ((BitOffset % 128) != 0) |
14056 | return SDValue(); |
14057 | |
14058 | assert((BitOffset % V.getScalarValueSizeInBits()) == 0 && |
14059 | "Unexpected bit-offset"); |
14060 | assert((V.getValueSizeInBits() == 256 || V.getValueSizeInBits() == 512) && |
14061 | "Unexpected vector size"); |
14062 | unsigned ExtractIdx = BitOffset / V.getScalarValueSizeInBits(); |
14063 | V = extract128BitVector(V, ExtractIdx, DAG, DL); |
14064 | } |
14065 | |
14066 | |
14067 | if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector()) { |
14068 | V = DAG.getBitcast(MVT::f64, V); |
14069 | if (Subtarget.hasAVX()) { |
14070 | V = DAG.getNode(X86ISD::VBROADCAST, DL, MVT::v2f64, V); |
14071 | return DAG.getBitcast(VT, V); |
14072 | } |
14073 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V); |
14074 | } |
14075 | |
14076 | |
14077 | if (!V.getValueType().isVector()) { |
14078 | assert(V.getScalarValueSizeInBits() == NumEltBits && |
14079 | "Unexpected scalar size"); |
14080 | MVT BroadcastVT = MVT::getVectorVT(V.getSimpleValueType(), |
14081 | VT.getVectorNumElements()); |
14082 | return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V)); |
14083 | } |
14084 | |
14085 | |
14086 | |
14087 | |
14088 | if (V.getValueSizeInBits() > 128) |
14089 | V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL); |
14090 | |
14091 | |
14092 | |
14093 | unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits; |
14094 | MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), NumSrcElts); |
14095 | return DAG.getNode(Opcode, DL, VT, DAG.getBitcast(CastVT, V)); |
14096 | } |
14097 | |
14098 | |
14099 | |
14100 | |
14101 | |
14102 | |
14103 | |
14104 | static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2, |
14105 | unsigned &InsertPSMask, |
14106 | const APInt &Zeroable, |
14107 | ArrayRef<int> Mask, SelectionDAG &DAG) { |
14108 | assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!"); |
14109 | assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!"); |
14110 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
14111 | |
14112 | |
14113 | |
14114 | |
14115 | auto matchAsInsertPS = [&](SDValue VA, SDValue VB, |
14116 | ArrayRef<int> CandidateMask) { |
14117 | unsigned ZMask = 0; |
14118 | int VADstIndex = -1; |
14119 | int VBDstIndex = -1; |
14120 | bool VAUsedInPlace = false; |
14121 | |
14122 | for (int i = 0; i < 4; ++i) { |
14123 | |
14124 | if (Zeroable[i]) { |
14125 | ZMask |= 1 << i; |
14126 | continue; |
14127 | } |
14128 | |
14129 | |
14130 | if (i == CandidateMask[i]) { |
14131 | VAUsedInPlace = true; |
14132 | continue; |
14133 | } |
14134 | |
14135 | |
14136 | if (VADstIndex >= 0 || VBDstIndex >= 0) |
14137 | return false; |
14138 | |
14139 | if (CandidateMask[i] < 4) { |
14140 | |
14141 | VADstIndex = i; |
14142 | } else { |
14143 | |
14144 | VBDstIndex = i; |
14145 | } |
14146 | } |
14147 | |
14148 | |
14149 | if (VADstIndex < 0 && VBDstIndex < 0) |
14150 | return false; |
14151 | |
14152 | |
14153 | |
14154 | unsigned VBSrcIndex = 0; |
14155 | if (VADstIndex >= 0) { |
14156 | |
14157 | |
14158 | VBSrcIndex = CandidateMask[VADstIndex]; |
14159 | VBDstIndex = VADstIndex; |
14160 | VB = VA; |
14161 | } else { |
14162 | VBSrcIndex = CandidateMask[VBDstIndex] - 4; |
14163 | } |
14164 | |
14165 | |
14166 | |
14167 | if (!VAUsedInPlace) |
14168 | VA = DAG.getUNDEF(MVT::v4f32); |
14169 | |
14170 | |
14171 | V1 = VA; |
14172 | V2 = VB; |
14173 | |
14174 | |
14175 | InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask; |
14176 | assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); |
14177 | return true; |
14178 | }; |
14179 | |
14180 | if (matchAsInsertPS(V1, V2, Mask)) |
14181 | return true; |
14182 | |
14183 | |
14184 | SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end()); |
14185 | ShuffleVectorSDNode::commuteMask(CommutedMask); |
14186 | if (matchAsInsertPS(V2, V1, CommutedMask)) |
14187 | return true; |
14188 | |
14189 | return false; |
14190 | } |
14191 | |
14192 | static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, |
14193 | ArrayRef<int> Mask, const APInt &Zeroable, |
14194 | SelectionDAG &DAG) { |
14195 | assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
14196 | assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
14197 | |
14198 | |
14199 | unsigned InsertPSMask = 0; |
14200 | if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG)) |
14201 | return SDValue(); |
14202 | |
14203 | |
14204 | return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, |
14205 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
14206 | } |
14207 | |
14208 | |
14209 | |
14210 | |
14211 | |
14212 | |
14213 | |
14214 | |
14215 | |
14216 | |
14217 | static SDValue lowerShuffleAsPermuteAndUnpack( |
14218 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
14219 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
14220 | assert(!VT.isFloatingPoint() && |
14221 | "This routine only supports integer vectors."); |
14222 | assert(VT.is128BitVector() && |
14223 | "This routine only works on 128-bit vectors."); |
14224 | assert(!V2.isUndef() && |
14225 | "This routine should only be used when blending two inputs."); |
14226 | assert(Mask.size() >= 2 && "Single element masks are invalid."); |
14227 | |
14228 | int Size = Mask.size(); |
14229 | |
14230 | int NumLoInputs = |
14231 | count_if(Mask, [Size](int M) { return M >= 0 && M % Size < Size / 2; }); |
14232 | int NumHiInputs = |
14233 | count_if(Mask, [Size](int M) { return M % Size >= Size / 2; }); |
14234 | |
14235 | bool UnpackLo = NumLoInputs >= NumHiInputs; |
14236 | |
14237 | auto TryUnpack = [&](int ScalarSize, int Scale) { |
14238 | SmallVector<int, 16> V1Mask((unsigned)Size, -1); |
14239 | SmallVector<int, 16> V2Mask((unsigned)Size, -1); |
14240 | |
14241 | for (int i = 0; i < Size; ++i) { |
14242 | if (Mask[i] < 0) |
14243 | continue; |
14244 | |
14245 | |
14246 | int UnpackIdx = i / Scale; |
14247 | |
14248 | |
14249 | |
14250 | if ((UnpackIdx % 2 == 0) != (Mask[i] < Size)) |
14251 | return SDValue(); |
14252 | |
14253 | |
14254 | |
14255 | SmallVectorImpl<int> &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask; |
14256 | VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] = |
14257 | Mask[i] % Size; |
14258 | } |
14259 | |
14260 | |
14261 | |
14262 | if ((NumLoInputs == 0 || NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) && |
14263 | !isNoopShuffleMask(V2Mask)) |
14264 | return SDValue(); |
14265 | |
14266 | |
14267 | V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); |
14268 | V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); |
14269 | |
14270 | |
14271 | MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale); |
14272 | V1 = DAG.getBitcast(UnpackVT, V1); |
14273 | V2 = DAG.getBitcast(UnpackVT, V2); |
14274 | |
14275 | |
14276 | return DAG.getBitcast( |
14277 | VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, |
14278 | UnpackVT, V1, V2)); |
14279 | }; |
14280 | |
14281 | |
14282 | |
14283 | int OrigScalarSize = VT.getScalarSizeInBits(); |
14284 | for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2) |
14285 | if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize)) |
14286 | return Unpack; |
14287 | |
14288 | |
14289 | |
14290 | if (ISD::isBuildVectorAllZeros(V1.getNode()) || |
14291 | ISD::isBuildVectorAllZeros(V2.getNode())) |
14292 | return SDValue(); |
14293 | |
14294 | |
14295 | |
14296 | if (NumLoInputs == 0 || NumHiInputs == 0) { |
14297 | assert((NumLoInputs > 0 || NumHiInputs > 0) && |
14298 | "We have to have *some* inputs!"); |
14299 | int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0; |
14300 | |
14301 | |
14302 | |
14303 | |
14304 | |
14305 | |
14306 | SmallVector<int, 32> PermMask((unsigned)Size, -1); |
14307 | for (int i = 0; i < Size; ++i) { |
14308 | if (Mask[i] < 0) |
14309 | continue; |
14310 | |
14311 | assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!"); |
14312 | |
14313 | PermMask[i] = |
14314 | 2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1); |
14315 | } |
14316 | return DAG.getVectorShuffle( |
14317 | VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, |
14318 | DL, VT, V1, V2), |
14319 | DAG.getUNDEF(VT), PermMask); |
14320 | } |
14321 | |
14322 | return SDValue(); |
14323 | } |
14324 | |
14325 | |
14326 | |
14327 | |
14328 | |
14329 | |
14330 | |
14331 | |
14332 | static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
14333 | const APInt &Zeroable, SDValue V1, SDValue V2, |
14334 | const X86Subtarget &Subtarget, |
14335 | SelectionDAG &DAG) { |
14336 | assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); |
14337 | assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); |
14338 | assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); |
14339 | |
14340 | if (V2.isUndef()) { |
14341 | |
14342 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2f64, V1, V2, |
14343 | Mask, Subtarget, DAG)) |
14344 | return Broadcast; |
14345 | |
14346 | |
14347 | |
14348 | unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1); |
14349 | |
14350 | if (Subtarget.hasAVX()) { |
14351 | |
14352 | |
14353 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, |
14354 | DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); |
14355 | } |
14356 | |
14357 | return DAG.getNode( |
14358 | X86ISD::SHUFP, DL, MVT::v2f64, |
14359 | Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, |
14360 | Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, |
14361 | DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); |
14362 | } |
14363 | assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!"); |
14364 | assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!"); |
14365 | assert(Mask[0] < 2 && "We sort V1 to be the first input."); |
14366 | assert(Mask[1] >= 2 && "We sort V2 to be the second input."); |
14367 | |
14368 | if (Subtarget.hasAVX2()) |
14369 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
14370 | return Extract; |
14371 | |
14372 | |
14373 | |
14374 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
14375 | DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
14376 | return Insertion; |
14377 | |
14378 | |
14379 | int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2), |
14380 | Mask[1] < 0 ? -1 : (Mask[1] ^ 2)}; |
14381 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
14382 | DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) |
14383 | return Insertion; |
14384 | |
14385 | |
14386 | |
14387 | if (isShuffleEquivalent(Mask, {0, 3}, V1, V2) || |
14388 | isShuffleEquivalent(Mask, {1, 3}, V1, V2)) |
14389 | if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG)) |
14390 | |
14391 | |
14392 | return DAG.getNode( |
14393 | X86ISD::MOVSD, DL, MVT::v2f64, V2, |
14394 | DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S)); |
14395 | |
14396 | if (Subtarget.hasSSE41()) |
14397 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, |
14398 | Zeroable, Subtarget, DAG)) |
14399 | return Blend; |
14400 | |
14401 | |
14402 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG)) |
14403 | return V; |
14404 | |
14405 | unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); |
14406 | return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, |
14407 | DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); |
14408 | } |
14409 | |
14410 | |
14411 | |
14412 | |
14413 | |
14414 | |
14415 | |
14416 | static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
14417 | const APInt &Zeroable, SDValue V1, SDValue V2, |
14418 | const X86Subtarget &Subtarget, |
14419 | SelectionDAG &DAG) { |
14420 | assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); |
14421 | assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); |
14422 | assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); |
14423 | |
14424 | if (V2.isUndef()) { |
14425 | |
14426 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2i64, V1, V2, |
14427 | Mask, Subtarget, DAG)) |
14428 | return Broadcast; |
14429 | |
14430 | |
14431 | |
14432 | |
14433 | V1 = DAG.getBitcast(MVT::v4i32, V1); |
14434 | int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2), |
14435 | Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1), |
14436 | Mask[1] < 0 ? -1 : (Mask[1] * 2), |
14437 | Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)}; |
14438 | return DAG.getBitcast( |
14439 | MVT::v2i64, |
14440 | DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, |
14441 | getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG))); |
14442 | } |
14443 | assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!"); |
14444 | assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!"); |
14445 | assert(Mask[0] < 2 && "We sort V1 to be the first input."); |
14446 | assert(Mask[1] >= 2 && "We sort V2 to be the second input."); |
14447 | |
14448 | if (Subtarget.hasAVX2()) |
14449 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
14450 | return Extract; |
14451 | |
14452 | |
14453 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, |
14454 | Zeroable, Subtarget, DAG)) |
14455 | return Shift; |
14456 | |
14457 | |
14458 | |
14459 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
14460 | DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
14461 | return Insertion; |
14462 | |
14463 | |
14464 | int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2}; |
14465 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
14466 | DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) |
14467 | return Insertion; |
14468 | |
14469 | |
14470 | |
14471 | bool IsBlendSupported = Subtarget.hasSSE41(); |
14472 | if (IsBlendSupported) |
14473 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, |
14474 | Zeroable, Subtarget, DAG)) |
14475 | return Blend; |
14476 | |
14477 | |
14478 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG)) |
14479 | return V; |
14480 | |
14481 | |
14482 | |
14483 | if (Subtarget.hasSSSE3()) { |
14484 | if (Subtarget.hasVLX()) |
14485 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v2i64, V1, V2, Mask, |
14486 | Subtarget, DAG)) |
14487 | return Rotate; |
14488 | |
14489 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v2i64, V1, V2, Mask, |
14490 | Subtarget, DAG)) |
14491 | return Rotate; |
14492 | } |
14493 | |
14494 | |
14495 | |
14496 | if (IsBlendSupported) |
14497 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v2i64, V1, V2, Mask, |
14498 | Subtarget, DAG); |
14499 | |
14500 | |
14501 | |
14502 | |
14503 | |
14504 | V1 = DAG.getBitcast(MVT::v2f64, V1); |
14505 | V2 = DAG.getBitcast(MVT::v2f64, V2); |
14506 | return DAG.getBitcast(MVT::v2i64, |
14507 | DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); |
14508 | } |
14509 | |
14510 | |
14511 | |
14512 | |
14513 | |
14514 | |
14515 | static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT, |
14516 | ArrayRef<int> Mask, SDValue V1, |
14517 | SDValue V2, SelectionDAG &DAG) { |
14518 | SDValue LowV = V1, HighV = V2; |
14519 | SmallVector<int, 4> NewMask(Mask.begin(), Mask.end()); |
14520 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); |
14521 | |
14522 | if (NumV2Elements == 1) { |
14523 | int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin(); |
14524 | |
14525 | |
14526 | |
14527 | int V2AdjIndex = V2Index ^ 1; |
14528 | |
14529 | if (Mask[V2AdjIndex] < 0) { |
14530 | |
14531 | |
14532 | |
14533 | if (V2Index < 2) |
14534 | std::swap(LowV, HighV); |
14535 | NewMask[V2Index] -= 4; |
14536 | } else { |
14537 | |
14538 | |
14539 | int V1Index = V2AdjIndex; |
14540 | int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0}; |
14541 | V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, |
14542 | getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); |
14543 | |
14544 | |
14545 | |
14546 | if (V2Index < 2) { |
14547 | LowV = V2; |
14548 | HighV = V1; |
14549 | } else { |
14550 | HighV = V2; |
14551 | } |
14552 | NewMask[V1Index] = 2; |
14553 | NewMask[V2Index] = 0; |
14554 | } |
14555 | } else if (NumV2Elements == 2) { |
14556 | if (Mask[0] < 4 && Mask[1] < 4) { |
14557 | |
14558 | |
14559 | NewMask[2] -= 4; |
14560 | NewMask[3] -= 4; |
14561 | } else if (Mask[2] < 4 && Mask[3] < 4) { |
14562 | |
14563 | |
14564 | |
14565 | NewMask[0] -= 4; |
14566 | NewMask[1] -= 4; |
14567 | HighV = V1; |
14568 | LowV = V2; |
14569 | } else { |
14570 | |
14571 | |
14572 | |
14573 | |
14574 | |
14575 | |
14576 | int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1], |
14577 | Mask[2] < 4 ? Mask[2] : Mask[3], |
14578 | (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4, |
14579 | (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4}; |
14580 | V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, |
14581 | getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); |
14582 | |
14583 | |
14584 | |
14585 | LowV = HighV = V1; |
14586 | NewMask[0] = Mask[0] < 4 ? 0 : 2; |
14587 | NewMask[1] = Mask[0] < 4 ? 2 : 0; |
14588 | NewMask[2] = Mask[2] < 4 ? 1 : 3; |
14589 | NewMask[3] = Mask[2] < 4 ? 3 : 1; |
14590 | } |
14591 | } else if (NumV2Elements == 3) { |
14592 | |
14593 | |
14594 | |
14595 | ShuffleVectorSDNode::commuteMask(NewMask); |
14596 | return lowerShuffleWithSHUFPS(DL, VT, NewMask, V2, V1, DAG); |
14597 | } |
14598 | return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV, |
14599 | getV4X86ShuffleImm8ForMask(NewMask, DL, DAG)); |
14600 | } |
14601 | |
14602 | |
14603 | |
14604 | |
14605 | |
14606 | |
14607 | static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
14608 | const APInt &Zeroable, SDValue V1, SDValue V2, |
14609 | const X86Subtarget &Subtarget, |
14610 | SelectionDAG &DAG) { |
14611 | assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
14612 | assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
14613 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
14614 | |
14615 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); |
14616 | |
14617 | if (NumV2Elements == 0) { |
14618 | |
14619 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f32, V1, V2, |
14620 | Mask, Subtarget, DAG)) |
14621 | return Broadcast; |
14622 | |
14623 | |
14624 | if (Subtarget.hasSSE3()) { |
14625 | if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2)) |
14626 | return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1); |
14627 | if (isShuffleEquivalent(Mask, {1, 1, 3, 3}, V1, V2)) |
14628 | return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1); |
14629 | } |
14630 | |
14631 | if (Subtarget.hasAVX()) { |
14632 | |
14633 | |
14634 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1, |
14635 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
14636 | } |
14637 | |
14638 | |
14639 | |
14640 | if (!Subtarget.hasSSE2()) { |
14641 | if (isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2)) |
14642 | return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1); |
14643 | if (isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1, V2)) |
14644 | return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1); |
14645 | } |
14646 | |
14647 | |
14648 | |
14649 | return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1, |
14650 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
14651 | } |
14652 | |
14653 | if (Subtarget.hasAVX2()) |
14654 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
14655 | return Extract; |
14656 | |
14657 | |
14658 | |
14659 | |
14660 | |
14661 | |
14662 | if (NumV2Elements == 1 && Mask[0] >= 4) |
14663 | if (SDValue V = lowerShuffleAsElementInsertion( |
14664 | DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
14665 | return V; |
14666 | |
14667 | if (Subtarget.hasSSE41()) { |
14668 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, |
14669 | Zeroable, Subtarget, DAG)) |
14670 | return Blend; |
14671 | |
14672 | |
14673 | if (SDValue V = lowerShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG)) |
14674 | return V; |
14675 | |
14676 | if (!isSingleSHUFPSMask(Mask)) |
14677 | if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1, |
14678 | V2, Mask, DAG)) |
14679 | return BlendPerm; |
14680 | } |
14681 | |
14682 | |
14683 | |
14684 | if (!Subtarget.hasSSE2()) { |
14685 | if (isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2)) |
14686 | return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2); |
14687 | if (isShuffleEquivalent(Mask, {2, 3, 6, 7}, V1, V2)) |
14688 | return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1); |
14689 | } |
14690 | |
14691 | |
14692 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG)) |
14693 | return V; |
14694 | |
14695 | |
14696 | return lowerShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG); |
14697 | } |
14698 | |
14699 | |
14700 | |
14701 | |
14702 | |
14703 | static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
14704 | const APInt &Zeroable, SDValue V1, SDValue V2, |
14705 | const X86Subtarget &Subtarget, |
14706 | SelectionDAG &DAG) { |
14707 | assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); |
14708 | assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); |
14709 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
14710 | |
14711 | |
14712 | |
14713 | |
14714 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, Mask, |
14715 | Zeroable, Subtarget, DAG)) |
14716 | return ZExt; |
14717 | |
14718 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); |
14719 | |
14720 | if (NumV2Elements == 0) { |
14721 | |
14722 | if (count_if(Mask, [](int M) { return M >= 0 && M < 4; }) > 1) { |
14723 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2, |
14724 | Mask, Subtarget, DAG)) |
14725 | return Broadcast; |
14726 | } |
14727 | |
14728 | |
14729 | |
14730 | |
14731 | |
14732 | |
14733 | const int UnpackLoMask[] = {0, 0, 1, 1}; |
14734 | const int UnpackHiMask[] = {2, 2, 3, 3}; |
14735 | if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) |
14736 | Mask = UnpackLoMask; |
14737 | else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) |
14738 | Mask = UnpackHiMask; |
14739 | |
14740 | return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, |
14741 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
14742 | } |
14743 | |
14744 | if (Subtarget.hasAVX2()) |
14745 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
14746 | return Extract; |
14747 | |
14748 | |
14749 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, |
14750 | Zeroable, Subtarget, DAG)) |
14751 | return Shift; |
14752 | |
14753 | |
14754 | if (NumV2Elements == 1) |
14755 | if (SDValue V = lowerShuffleAsElementInsertion( |
14756 | DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
14757 | return V; |
14758 | |
14759 | |
14760 | |
14761 | bool IsBlendSupported = Subtarget.hasSSE41(); |
14762 | if (IsBlendSupported) |
14763 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, |
14764 | Zeroable, Subtarget, DAG)) |
14765 | return Blend; |
14766 | |
14767 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, |
14768 | Zeroable, Subtarget, DAG)) |
14769 | return Masked; |
14770 | |
14771 | |
14772 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG)) |
14773 | return V; |
14774 | |
14775 | |
14776 | |
14777 | if (Subtarget.hasSSSE3()) { |
14778 | if (Subtarget.hasVLX()) |
14779 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i32, V1, V2, Mask, |
14780 | Subtarget, DAG)) |
14781 | return Rotate; |
14782 | |
14783 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask, |
14784 | Subtarget, DAG)) |
14785 | return Rotate; |
14786 | } |
14787 | |
14788 | |
14789 | |
14790 | |
14791 | if (!isSingleSHUFPSMask(Mask)) { |
14792 | |
14793 | |
14794 | if (IsBlendSupported) |
14795 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i32, V1, V2, Mask, |
14796 | Subtarget, DAG); |
14797 | |
14798 | |
14799 | if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, V2, |
14800 | Mask, Subtarget, DAG)) |
14801 | return Unpack; |
14802 | } |
14803 | |
14804 | |
14805 | |
14806 | |
14807 | |
14808 | |
14809 | SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1); |
14810 | SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2); |
14811 | SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask); |
14812 | return DAG.getBitcast(MVT::v4i32, ShufPS); |
14813 | } |
14814 | |
14815 | |
14816 | |
14817 | |
14818 | |
14819 | |
14820 | |
14821 | |
14822 | |
14823 | |
14824 | |
14825 | |
14826 | |
14827 | |
14828 | |
14829 | |
14830 | |
14831 | static SDValue lowerV8I16GeneralSingleInputShuffle( |
14832 | const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask, |
14833 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
14834 | assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); |
14835 | MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); |
14836 | |
14837 | assert(Mask.size() == 8 && "Shuffle mask length doesn't match!"); |
14838 | MutableArrayRef<int> LoMask = Mask.slice(0, 4); |
14839 | MutableArrayRef<int> HiMask = Mask.slice(4, 4); |
14840 | |
14841 | |
14842 | if (isUndefOrInRange(LoMask, 0, 4) && |
14843 | isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { |
14844 | return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, |
14845 | getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); |
14846 | } |
14847 | if (isUndefOrInRange(HiMask, 4, 8) && |
14848 | isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { |
14849 | for (int i = 0; i != 4; ++i) |
14850 | HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4)); |
14851 | return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, |
14852 | getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); |
14853 | } |
14854 | |
14855 | SmallVector<int, 4> LoInputs; |
14856 | copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; }); |
14857 | array_pod_sort(LoInputs.begin(), LoInputs.end()); |
14858 | LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); |
14859 | SmallVector<int, 4> HiInputs; |
14860 | copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; }); |
14861 | array_pod_sort(HiInputs.begin(), HiInputs.end()); |
14862 | HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); |
14863 | int NumLToL = llvm::lower_bound(LoInputs, 4) - LoInputs.begin(); |
14864 | int NumHToL = LoInputs.size() - NumLToL; |
14865 | int NumLToH = llvm::lower_bound(HiInputs, 4) - HiInputs.begin(); |
14866 | int NumHToH = HiInputs.size() - NumLToH; |
14867 | MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL); |
14868 | MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH); |
14869 | MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL); |
14870 | MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH); |
14871 | |
14872 | |
14873 | |
14874 | |
14875 | auto ShuffleDWordPairs = [&](ArrayRef<int> PSHUFHalfMask, |
14876 | ArrayRef<int> PSHUFDMask, unsigned ShufWOp) { |
14877 | V = DAG.getNode(ShufWOp, DL, VT, V, |
14878 | getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); |
14879 | V = DAG.getBitcast(PSHUFDVT, V); |
14880 | V = DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, V, |
14881 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); |
14882 | return DAG.getBitcast(VT, V); |
14883 | }; |
14884 | |
14885 | if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) { |
14886 | int PSHUFDMask[4] = { -1, -1, -1, -1 }; |
14887 | SmallVector<std::pair<int, int>, 4> DWordPairs; |
14888 | int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2); |
14889 | |
14890 | |
14891 | for (int DWord = 0; DWord != 4; ++DWord) { |
14892 | int M0 = Mask[2 * DWord + 0]; |
14893 | int M1 = Mask[2 * DWord + 1]; |
14894 | M0 = (M0 >= 0 ? M0 % 4 : M0); |
14895 | M1 = (M1 >= 0 ? M1 % 4 : M1); |
14896 | if (M0 < 0 && M1 < 0) |
14897 | continue; |
14898 | |
14899 | bool Match = false; |
14900 | for (int j = 0, e = DWordPairs.size(); j < e; ++j) { |
14901 | auto &DWordPair = DWordPairs[j]; |
14902 | if ((M0 < 0 || isUndefOrEqual(DWordPair.first, M0)) && |
14903 | (M1 < 0 || isUndefOrEqual(DWordPair.second, M1))) { |
14904 | DWordPair.first = (M0 >= 0 ? M0 : DWordPair.first); |
14905 | DWordPair.second = (M1 >= 0 ? M1 : DWordPair.second); |
14906 | PSHUFDMask[DWord] = DOffset + j; |
14907 | Match = true; |
14908 | break; |
14909 | } |
14910 | } |
14911 | if (!Match) { |
14912 | PSHUFDMask[DWord] = DOffset + DWordPairs.size(); |
14913 | DWordPairs.push_back(std::make_pair(M0, M1)); |
14914 | } |
14915 | } |
14916 | |
14917 | if (DWordPairs.size() <= 2) { |
14918 | DWordPairs.resize(2, std::make_pair(-1, -1)); |
14919 | int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second, |
14920 | DWordPairs[1].first, DWordPairs[1].second}; |
14921 | if ((NumHToL + NumHToH) == 0) |
14922 | return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW); |
14923 | if ((NumLToL + NumLToH) == 0) |
14924 | return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFHW); |
14925 | } |
14926 | } |
14927 | |
14928 | |
14929 | |
14930 | |
14931 | |
14932 | |
14933 | |
14934 | |
14935 | |
14936 | |
14937 | |
14938 | |
14939 | |
14940 | |
14941 | |
14942 | |
14943 | |
14944 | |
14945 | |
14946 | |
14947 | |
14948 | |
14949 | |
14950 | |
14951 | |
14952 | |
14953 | |
14954 | |
14955 | |
14956 | |
14957 | |
14958 | auto balanceSides = [&](ArrayRef<int> AToAInputs, ArrayRef<int> BToAInputs, |
14959 | ArrayRef<int> BToBInputs, ArrayRef<int> AToBInputs, |
14960 | int AOffset, int BOffset) { |
14961 | assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) && |
14962 | "Must call this with A having 3 or 1 inputs from the A half."); |
14963 | assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) && |
14964 | "Must call this with B having 1 or 3 inputs from the B half."); |
14965 | assert(AToAInputs.size() + BToAInputs.size() == 4 && |
14966 | "Must call this with either 3:1 or 1:3 inputs (summing to 4)."); |
14967 | |
14968 | bool ThreeAInputs = AToAInputs.size() == 3; |
14969 | |
14970 | |
14971 | |
14972 | |
14973 | |
14974 | int ADWord = 0, BDWord = 0; |
14975 | int &TripleDWord = ThreeAInputs ? ADWord : BDWord; |
14976 | int &OneInputDWord = ThreeAInputs ? BDWord : ADWord; |
14977 | int TripleInputOffset = ThreeAInputs ? AOffset : BOffset; |
14978 | ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs; |
14979 | int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0]; |
14980 | int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset); |
14981 | int TripleNonInputIdx = |
14982 | TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0); |
14983 | TripleDWord = TripleNonInputIdx / 2; |
14984 | |
14985 | |
14986 | |
14987 | OneInputDWord = (OneInput / 2) ^ 1; |
14988 | |
14989 | |
14990 | |
14991 | |
14992 | |
14993 | |
14994 | if (BToBInputs.size() == 2 && AToBInputs.size() == 2) { |
14995 | |
14996 | |
14997 | |
14998 | |
14999 | int NumFlippedAToBInputs = |
15000 | std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) + |
15001 | std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1); |
15002 | int NumFlippedBToBInputs = |
15003 | std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) + |
15004 | std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1); |
15005 | if ((NumFlippedAToBInputs == 1 && |
15006 | (NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) || |
15007 | (NumFlippedBToBInputs == 1 && |
15008 | (NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) { |
15009 | |
15010 | |
15011 | |
15012 | |
15013 | auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord, |
15014 | ArrayRef<int> Inputs) { |
15015 | int FixIdx = PinnedIdx ^ 1; |
15016 | bool IsFixIdxInput = is_contained(Inputs, PinnedIdx ^ 1); |
15017 | |
15018 | |
15019 | |
15020 | int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord)); |
15021 | bool IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx); |
15022 | if (IsFixIdxInput == IsFixFreeIdxInput) |
15023 | FixFreeIdx += 1; |
15024 | IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx); |
15025 | assert(IsFixIdxInput != IsFixFreeIdxInput && |
15026 | "We need to be changing the number of flipped inputs!"); |
15027 | int PSHUFHalfMask[] = {0, 1, 2, 3}; |
15028 | std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]); |
15029 | V = DAG.getNode( |
15030 | FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, |
15031 | MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V, |
15032 | getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); |
15033 | |
15034 | for (int &M : Mask) |
15035 | if (M >= 0 && M == FixIdx) |
15036 | M = FixFreeIdx; |
15037 | else if (M >= 0 && M == FixFreeIdx) |
15038 | M = FixIdx; |
15039 | }; |
15040 | if (NumFlippedBToBInputs != 0) { |
15041 | int BPinnedIdx = |
15042 | BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput; |
15043 | FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs); |
15044 | } else { |
15045 | assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!"); |
15046 | int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput; |
15047 | FixFlippedInputs(APinnedIdx, ADWord, AToBInputs); |
15048 | } |
15049 | } |
15050 | } |
15051 | |
15052 | int PSHUFDMask[] = {0, 1, 2, 3}; |
15053 | PSHUFDMask[ADWord] = BDWord; |
15054 | PSHUFDMask[BDWord] = ADWord; |
15055 | V = DAG.getBitcast( |
15056 | VT, |
15057 | DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), |
15058 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
15059 | |
15060 | |
15061 | for (int &M : Mask) |
15062 | if (M >= 0 && M/2 == ADWord) |
15063 | M = 2 * BDWord + M % 2; |
15064 | else if (M >= 0 && M/2 == BDWord) |
15065 | M = 2 * ADWord + M % 2; |
15066 | |
15067 | |
15068 | |
15069 | return lowerV8I16GeneralSingleInputShuffle(DL, VT, V, Mask, Subtarget, DAG); |
15070 | }; |
15071 | if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3)) |
15072 | return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4); |
15073 | if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3)) |
15074 | return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0); |
15075 | |
15076 | |
15077 | |
15078 | |
15079 | |
15080 | |
15081 | int PSHUFLMask[4] = {-1, -1, -1, -1}; |
15082 | int PSHUFHMask[4] = {-1, -1, -1, -1}; |
15083 | int PSHUFDMask[4] = {-1, -1, -1, -1}; |
15084 | |
15085 | |
15086 | |
15087 | |
15088 | auto fixInPlaceInputs = |
15089 | [&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs, |
15090 | MutableArrayRef<int> SourceHalfMask, |
15091 | MutableArrayRef<int> HalfMask, int HalfOffset) { |
15092 | if (InPlaceInputs.empty()) |
15093 | return; |
15094 | if (InPlaceInputs.size() == 1) { |
15095 | SourceHalfMask[InPlaceInputs[0] - HalfOffset] = |
15096 | InPlaceInputs[0] - HalfOffset; |
15097 | PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2; |
15098 | return; |
15099 | } |
15100 | if (IncomingInputs.empty()) { |
15101 | |
15102 | for (int Input : InPlaceInputs) { |
15103 | SourceHalfMask[Input - HalfOffset] = Input - HalfOffset; |
15104 | PSHUFDMask[Input / 2] = Input / 2; |
15105 | } |
15106 | return; |
15107 | } |
15108 | |
15109 | assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!"); |
15110 | SourceHalfMask[InPlaceInputs[0] - HalfOffset] = |
15111 | InPlaceInputs[0] - HalfOffset; |
15112 | |
15113 | |
15114 | int AdjIndex = InPlaceInputs[0] ^ 1; |
15115 | SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset; |
15116 | std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex); |
15117 | PSHUFDMask[AdjIndex / 2] = AdjIndex / 2; |
15118 | }; |
15119 | fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0); |
15120 | fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4); |
15121 | |
15122 | |
15123 | |
15124 | |
15125 | |
15126 | auto moveInputsToRightHalf = [&PSHUFDMask]( |
15127 | MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs, |
15128 | MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask, |
15129 | MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset, |
15130 | int DestOffset) { |
15131 | auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) { |
15132 | return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word; |
15133 | }; |
15134 | auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask, |
15135 | int Word) { |
15136 | int LowWord = Word & ~1; |
15137 | int HighWord = Word | 1; |
15138 | return isWordClobbered(SourceHalfMask, LowWord) || |
15139 | isWordClobbered(SourceHalfMask, HighWord); |
15140 | }; |
15141 | |
15142 | if (IncomingInputs.empty()) |
15143 | return; |
15144 | |
15145 | if (ExistingInputs.empty()) { |
15146 | |
15147 | for (int Input : IncomingInputs) { |
15148 | |
15149 | |
15150 | if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) { |
15151 | if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) { |
15152 | SourceHalfMask[SourceHalfMask[Input - SourceOffset]] = |
15153 | Input - SourceOffset; |
15154 | |
15155 | for (int &M : HalfMask) |
15156 | if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset) |
15157 | M = Input; |
15158 | else if (M == Input) |
15159 | M = SourceHalfMask[Input - SourceOffset] + SourceOffset; |
15160 | } else { |
15161 | assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == |
15162 | Input - SourceOffset && |
15163 | "Previous placement doesn't match!"); |
15164 | } |
15165 | |
15166 | |
15167 | |
15168 | Input = SourceHalfMask[Input - SourceOffset] + SourceOffset; |
15169 | } |
15170 | |
15171 | |
15172 | if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0) |
15173 | PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2; |
15174 | else |
15175 | assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == |
15176 | Input / 2 && |
15177 | "Previous placement doesn't match!"); |
15178 | } |
15179 | |
15180 | |
15181 | |
15182 | |
15183 | for (int &M : HalfMask) |
15184 | if (M >= SourceOffset && M < SourceOffset + 4) { |
15185 | M = M - SourceOffset + DestOffset; |
15186 | assert(M >= 0 && "This should never wrap below zero!"); |
15187 | } |
15188 | return; |
15189 | } |
15190 | |
15191 | |
15192 | |
15193 | |
15194 | if (IncomingInputs.size() == 1) { |
15195 | if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { |
15196 | int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) + |
15197 | SourceOffset; |
15198 | SourceHalfMask[InputFixed - SourceOffset] = |
15199 | IncomingInputs[0] - SourceOffset; |
15200 | std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0], |
15201 | InputFixed); |
15202 | IncomingInputs[0] = InputFixed; |
15203 | } |
15204 | } else if (IncomingInputs.size() == 2) { |
15205 | if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 || |
15206 | isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { |
15207 | |
15208 | |
15209 | |
15210 | int InputsFixed[2] = {IncomingInputs[0] - SourceOffset, |
15211 | IncomingInputs[1] - SourceOffset}; |
15212 | |
15213 | |
15214 | |
15215 | |
15216 | if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) && |
15217 | SourceHalfMask[InputsFixed[0] ^ 1] < 0) { |
15218 | SourceHalfMask[InputsFixed[0]] = InputsFixed[0]; |
15219 | SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; |
15220 | InputsFixed[1] = InputsFixed[0] ^ 1; |
15221 | } else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) && |
15222 | SourceHalfMask[InputsFixed[1] ^ 1] < 0) { |
15223 | SourceHalfMask[InputsFixed[1]] = InputsFixed[1]; |
15224 | SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0]; |
15225 | InputsFixed[0] = InputsFixed[1] ^ 1; |
15226 | } else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] < 0 && |
15227 | SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] < 0) { |
15228 | |
15229 | |
15230 | |
15231 | SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0]; |
15232 | SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1]; |
15233 | InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1); |
15234 | InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1; |
15235 | } else { |
15236 | |
15237 | |
15238 | |
15239 | |
15240 | for (int i = 0; i < 4; ++i) |
15241 | assert((SourceHalfMask[i] < 0 || SourceHalfMask[i] == i) && |
15242 | "We can't handle any clobbers here!"); |
15243 | assert(InputsFixed[1] != (InputsFixed[0] ^ 1) && |
15244 | "Cannot have adjacent inputs here!"); |
15245 | |
15246 | SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; |
15247 | SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1; |
15248 | |
15249 | |
15250 | |
15251 | for (int &M : FinalSourceHalfMask) |
15252 | if (M == (InputsFixed[0] ^ 1) + SourceOffset) |
15253 | M = InputsFixed[1] + SourceOffset; |
15254 | else if (M == InputsFixed[1] + SourceOffset) |
15255 | M = (InputsFixed[0] ^ 1) + SourceOffset; |
15256 | |
15257 | InputsFixed[1] = InputsFixed[0] ^ 1; |
15258 | } |
15259 | |
15260 | |
15261 | for (int &M : HalfMask) |
15262 | if (M == IncomingInputs[0]) |
15263 | M = InputsFixed[0] + SourceOffset; |
15264 | else if (M == IncomingInputs[1]) |
15265 | M = InputsFixed[1] + SourceOffset; |
15266 | |
15267 | IncomingInputs[0] = InputsFixed[0] + SourceOffset; |
15268 | IncomingInputs[1] = InputsFixed[1] + SourceOffset; |
15269 | } |
15270 | } else { |
15271 | llvm_unreachable("Unhandled input size!"); |
15272 | } |
15273 | |
15274 | |
15275 | int FreeDWord = (PSHUFDMask[DestOffset / 2] < 0 ? 0 : 1) + DestOffset / 2; |
15276 | assert(PSHUFDMask[FreeDWord] < 0 && "DWord not free"); |
15277 | PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2; |
15278 | for (int &M : HalfMask) |
15279 | for (int Input : IncomingInputs) |
15280 | if (M == Input) |
15281 | M = FreeDWord * 2 + Input % 2; |
15282 | }; |
15283 | moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask, |
15284 | 4, 0); |
15285 | moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask, |
15286 | 0, 4); |
15287 | |
15288 | |
15289 | |
15290 | if (!isNoopShuffleMask(PSHUFLMask)) |
15291 | V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, |
15292 | getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG)); |
15293 | if (!isNoopShuffleMask(PSHUFHMask)) |
15294 | V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, |
15295 | getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG)); |
15296 | if (!isNoopShuffleMask(PSHUFDMask)) |
15297 | V = DAG.getBitcast( |
15298 | VT, |
15299 | DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), |
15300 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
15301 | |
15302 | |
15303 | |
15304 | assert(count_if(LoMask, [](int M) { return M >= 4; }) == 0 && |
15305 | "Failed to lift all the high half inputs to the low mask!"); |
15306 | assert(count_if(HiMask, [](int M) { return M >= 0 && M < 4; }) == 0 && |
15307 | "Failed to lift all the low half inputs to the high mask!"); |
15308 | |
15309 | |
15310 | if (!isNoopShuffleMask(LoMask)) |
15311 | V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, |
15312 | getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); |
15313 | |
15314 | |
15315 | for (int &M : HiMask) |
15316 | if (M >= 0) |
15317 | M -= 4; |
15318 | if (!isNoopShuffleMask(HiMask)) |
15319 | V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, |
15320 | getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); |
15321 | |
15322 | return V; |
15323 | } |
15324 | |
15325 | |
15326 | |
15327 | static SDValue lowerShuffleAsBlendOfPSHUFBs( |
15328 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
15329 | const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) { |
15330 | assert(!is128BitLaneCrossingShuffleMask(VT, Mask) && |
15331 | "Lane crossing shuffle masks not supported"); |
15332 | |
15333 | int NumBytes = VT.getSizeInBits() / 8; |
15334 | int Size = Mask.size(); |
15335 | int Scale = NumBytes / Size; |
15336 | |
15337 | SmallVector<SDValue, 64> V1Mask(NumBytes, DAG.getUNDEF(MVT::i8)); |
15338 | SmallVector<SDValue, 64> V2Mask(NumBytes, DAG.getUNDEF(MVT::i8)); |
15339 | V1InUse = false; |
15340 | V2InUse = false; |
15341 | |
15342 | for (int i = 0; i < NumBytes; ++i) { |
15343 | int M = Mask[i / Scale]; |
15344 | if (M < 0) |
15345 | continue; |
15346 | |
15347 | const int ZeroMask = 0x80; |
15348 | int V1Idx = M < Size ? M * Scale + i % Scale : ZeroMask; |
15349 | int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale; |
15350 | if (Zeroable[i / Scale]) |
15351 | V1Idx = V2Idx = ZeroMask; |
15352 | |
15353 | V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8); |
15354 | V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8); |
15355 | V1InUse |= (ZeroMask != V1Idx); |
15356 | V2InUse |= (ZeroMask != V2Idx); |
15357 | } |
15358 | |
15359 | MVT ShufVT = MVT::getVectorVT(MVT::i8, NumBytes); |
15360 | if (V1InUse) |
15361 | V1 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V1), |
15362 | DAG.getBuildVector(ShufVT, DL, V1Mask)); |
15363 | if (V2InUse) |
15364 | V2 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V2), |
15365 | DAG.getBuildVector(ShufVT, DL, V2Mask)); |
15366 | |
15367 | |
15368 | SDValue V; |
15369 | if (V1InUse && V2InUse) |
15370 | V = DAG.getNode(ISD::OR, DL, ShufVT, V1, V2); |
15371 | else |
15372 | V = V1InUse ? V1 : V2; |
15373 | |
15374 | |
15375 | return DAG.getBitcast(VT, V); |
15376 | } |
15377 | |
15378 | |
15379 | |
15380 | |
15381 | |
15382 | |
15383 | |
15384 | |
15385 | |
15386 | |
15387 | |
15388 | |
15389 | |
15390 | static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
15391 | const APInt &Zeroable, SDValue V1, SDValue V2, |
15392 | const X86Subtarget &Subtarget, |
15393 | SelectionDAG &DAG) { |
15394 | assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); |
15395 | assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); |
15396 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
15397 | |
15398 | |
15399 | |
15400 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i16, V1, V2, Mask, |
15401 | Zeroable, Subtarget, DAG)) |
15402 | return ZExt; |
15403 | |
15404 | |
15405 | if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v8i16, V1, V2, Mask, Zeroable, |
15406 | Subtarget, DAG)) |
15407 | return V; |
15408 | |
15409 | int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; }); |
15410 | |
15411 | if (NumV2Inputs == 0) { |
15412 | |
15413 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, |
15414 | Zeroable, Subtarget, DAG)) |
15415 | return Shift; |
15416 | |
15417 | |
15418 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2, |
15419 | Mask, Subtarget, DAG)) |
15420 | return Broadcast; |
15421 | |
15422 | |
15423 | if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask, |
15424 | Subtarget, DAG)) |
15425 | return Rotate; |
15426 | |
15427 | |
15428 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) |
15429 | return V; |
15430 | |
15431 | |
15432 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, |
15433 | Subtarget)) |
15434 | return V; |
15435 | |
15436 | |
15437 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask, |
15438 | Subtarget, DAG)) |
15439 | return Rotate; |
15440 | |
15441 | |
15442 | SmallVector<int, 8> MutableMask(Mask.begin(), Mask.end()); |
15443 | return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v8i16, V1, MutableMask, |
15444 | Subtarget, DAG); |
15445 | } |
15446 | |
15447 | assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) && |
15448 | "All single-input shuffles should be canonicalized to be V1-input " |
15449 | "shuffles."); |
15450 | |
15451 | |
15452 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, |
15453 | Zeroable, Subtarget, DAG)) |
15454 | return Shift; |
15455 | |
15456 | |
15457 | if (Subtarget.hasSSE4A()) |
15458 | if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, |
15459 | Zeroable, DAG)) |
15460 | return V; |
15461 | |
15462 | |
15463 | if (NumV2Inputs == 1) |
15464 | if (SDValue V = lowerShuffleAsElementInsertion( |
15465 | DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
15466 | return V; |
15467 | |
15468 | |
15469 | |
15470 | bool IsBlendSupported = Subtarget.hasSSE41(); |
15471 | if (IsBlendSupported) |
15472 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, |
15473 | Zeroable, Subtarget, DAG)) |
15474 | return Blend; |
15475 | |
15476 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, |
15477 | Zeroable, Subtarget, DAG)) |
15478 | return Masked; |
15479 | |
15480 | |
15481 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) |
15482 | return V; |
15483 | |
15484 | |
15485 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, |
15486 | Subtarget)) |
15487 | return V; |
15488 | |
15489 | |
15490 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v8i16, V1, V2, Mask, Zeroable, |
15491 | Subtarget, DAG)) |
15492 | return V; |
15493 | |
15494 | |
15495 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask, |
15496 | Subtarget, DAG)) |
15497 | return Rotate; |
15498 | |
15499 | if (SDValue BitBlend = |
15500 | lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) |
15501 | return BitBlend; |
15502 | |
15503 | |
15504 | if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v8i16, V1, V2, Mask, |
15505 | Zeroable, Subtarget, DAG)) |
15506 | return V; |
15507 | |
15508 | |
15509 | |
15510 | |
15511 | int NumEvenDrops = canLowerByDroppingEvenElements(Mask, false); |
15512 | if ((NumEvenDrops == 1 || NumEvenDrops == 2) && Subtarget.hasSSE41() && |
15513 | !Subtarget.hasVLX()) { |
15514 | SmallVector<SDValue, 8> DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32)); |
15515 | for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1)) |
15516 | DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32); |
15517 | SDValue DWordClearMask = DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps); |
15518 | V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1), |
15519 | DWordClearMask); |
15520 | V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2), |
15521 | DWordClearMask); |
15522 | |
15523 | SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, V1, V2); |
15524 | if (NumEvenDrops == 2) { |
15525 | Result = DAG.getBitcast(MVT::v4i32, Result); |
15526 | Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, Result, Result); |
15527 | } |
15528 | return Result; |
15529 | } |
15530 | |
15531 | |
15532 | if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2, |
15533 | Mask, Subtarget, DAG)) |
15534 | return Unpack; |
15535 | |
15536 | |
15537 | |
15538 | if (!IsBlendSupported && Subtarget.hasSSSE3()) { |
15539 | bool V1InUse, V2InUse; |
15540 | return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, |
15541 | Zeroable, DAG, V1InUse, V2InUse); |
15542 | } |
15543 | |
15544 | |
15545 | |
15546 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i16, V1, V2, |
15547 | Mask, Subtarget, DAG); |
15548 | } |
15549 | |
15550 | |
15551 | static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
15552 | const APInt &Zeroable, SDValue V1, SDValue V2, |
15553 | const X86Subtarget &Subtarget, |
15554 | SelectionDAG &DAG) { |
15555 | assert(V1.getSimpleValueType() == MVT::v8f16 && "Bad operand type!"); |
15556 | assert(V2.getSimpleValueType() == MVT::v8f16 && "Bad operand type!"); |
15557 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
15558 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 8; }); |
15559 | |
15560 | if (NumV2Elements == 0) { |
15561 | |
15562 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f16, V1, V2, |
15563 | Mask, Subtarget, DAG)) |
15564 | return Broadcast; |
15565 | } |
15566 | if (NumV2Elements == 1 && Mask[0] >= 8) |
15567 | if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v8f16, V1, V2, Mask, |
15568 | Zeroable, Subtarget, DAG)) |
15569 | return V; |
15570 | |
15571 | V1 = DAG.getBitcast(MVT::v8i16, V1); |
15572 | V2 = DAG.getBitcast(MVT::v8i16, V2); |
15573 | return DAG.getBitcast(MVT::v8f16, |
15574 | DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, Mask)); |
15575 | } |
15576 | |
15577 | |
15578 | |
15579 | |
15580 | static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, |
15581 | ArrayRef<int> Mask, SDValue V1, SDValue V2, |
15582 | const X86Subtarget &Subtarget, |
15583 | SelectionDAG &DAG) { |
15584 | MVT MaskVT = VT.changeTypeToInteger(); |
15585 | SDValue MaskNode; |
15586 | MVT ShuffleVT = VT; |
15587 | if (!VT.is512BitVector() && !Subtarget.hasVLX()) { |
15588 | V1 = widenSubVector(V1, false, Subtarget, DAG, DL, 512); |
15589 | V2 = widenSubVector(V2, false, Subtarget, DAG, DL, 512); |
15590 | ShuffleVT = V1.getSimpleValueType(); |
15591 | |
15592 | |
15593 | int NumElts = VT.getVectorNumElements(); |
15594 | unsigned Scale = 512 / VT.getSizeInBits(); |
15595 | SmallVector<int, 32> AdjustedMask(Mask.begin(), Mask.end()); |
15596 | for (int &M : AdjustedMask) |
15597 | if (NumElts <= M) |
15598 | M += (Scale - 1) * NumElts; |
15599 | MaskNode = getConstVector(AdjustedMask, MaskVT, DAG, DL, true); |
15600 | MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512); |
15601 | } else { |
15602 | MaskNode = getConstVector(Mask, MaskVT, DAG, DL, true); |
15603 | } |
15604 | |
15605 | SDValue Result; |
15606 | if (V2.isUndef()) |
15607 | Result = DAG.getNode(X86ISD::VPERMV, DL, ShuffleVT, MaskNode, V1); |
15608 | else |
15609 | Result = DAG.getNode(X86ISD::VPERMV3, DL, ShuffleVT, V1, MaskNode, V2); |
15610 | |
15611 | if (VT != ShuffleVT) |
15612 | Result = extractSubVector(Result, 0, DAG, DL, VT.getSizeInBits()); |
15613 | |
15614 | return Result; |
15615 | } |
15616 | |
15617 | |
15618 | |
15619 | |
15620 | |
15621 | |
15622 | |
15623 | |
15624 | static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
15625 | const APInt &Zeroable, SDValue V1, SDValue V2, |
15626 | const X86Subtarget &Subtarget, |
15627 | SelectionDAG &DAG) { |
15628 | assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); |
15629 | assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); |
15630 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
15631 | |
15632 | |
15633 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, |
15634 | Zeroable, Subtarget, DAG)) |
15635 | return Shift; |
15636 | |
15637 | |
15638 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i8, V1, V2, Mask, |
15639 | Subtarget, DAG)) |
15640 | return Rotate; |
15641 | |
15642 | |
15643 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, |
15644 | Subtarget)) |
15645 | return V; |
15646 | |
15647 | |
15648 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v16i8, V1, V2, Mask, |
15649 | Zeroable, Subtarget, DAG)) |
15650 | return ZExt; |
15651 | |
15652 | |
15653 | if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v16i8, V1, V2, Mask, Zeroable, |
15654 | Subtarget, DAG)) |
15655 | return V; |
15656 | |
15657 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i8, V1, V2, Mask, Zeroable, |
15658 | Subtarget, DAG)) |
15659 | return V; |
15660 | |
15661 | |
15662 | if (Subtarget.hasSSE4A()) |
15663 | if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, |
15664 | Zeroable, DAG)) |
15665 | return V; |
15666 | |
15667 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; }); |
15668 | |
15669 | |
15670 | if (NumV2Elements == 0) { |
15671 | |
15672 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i8, V1, V2, |
15673 | Mask, Subtarget, DAG)) |
15674 | return Broadcast; |
15675 | |
15676 | |
15677 | if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask, |
15678 | Subtarget, DAG)) |
15679 | return Rotate; |
15680 | |
15681 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) |
15682 | return V; |
15683 | |
15684 | |
15685 | |
15686 | |
15687 | |
15688 | |
15689 | |
15690 | |
15691 | |
15692 | auto canWidenViaDuplication = [](ArrayRef<int> Mask) { |
15693 | for (int i = 0; i < 16; i += 2) |
15694 | if (Mask[i] >= 0 && Mask[i + 1] >= 0 && Mask[i] != Mask[i + 1]) |
15695 | return false; |
15696 | |
15697 | return true; |
15698 | }; |
15699 | auto tryToWidenViaDuplication = [&]() -> SDValue { |
15700 | if (!canWidenViaDuplication(Mask)) |
15701 | return SDValue(); |
15702 | SmallVector<int, 4> LoInputs; |
15703 | copy_if(Mask, std::back_inserter(LoInputs), |
15704 | [](int M) { return M >= 0 && M < 8; }); |
15705 | array_pod_sort(LoInputs.begin(), LoInputs.end()); |
15706 | LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), |
15707 | LoInputs.end()); |
15708 | SmallVector<int, 4> HiInputs; |
15709 | copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; }); |
15710 | array_pod_sort(HiInputs.begin(), HiInputs.end()); |
15711 | HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), |
15712 | HiInputs.end()); |
15713 | |
15714 | bool TargetLo = LoInputs.size() >= HiInputs.size(); |
15715 | ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs; |
15716 | ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs; |
15717 | |
15718 | int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1}; |
15719 | SmallDenseMap<int, int, 8> LaneMap; |
15720 | for (int I : InPlaceInputs) { |
15721 | PreDupI16Shuffle[I/2] = I/2; |
15722 | LaneMap[I] = I; |
15723 | } |
15724 | int j = TargetLo ? 0 : 4, je = j + 4; |
15725 | for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) { |
15726 | |
15727 | |
15728 | if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) { |
15729 | |
15730 | |
15731 | while (j < je && PreDupI16Shuffle[j] >= 0) |
15732 | ++j; |
15733 | |
15734 | if (j == je) |
15735 | |
15736 | return SDValue(); |
15737 | |
15738 | |
15739 | PreDupI16Shuffle[j] = MovingInputs[i] / 2; |
15740 | } |
15741 | |
15742 | |
15743 | LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2; |
15744 | } |
15745 | V1 = DAG.getBitcast( |
15746 | MVT::v16i8, |
15747 | DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), |
15748 | DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); |
15749 | |
15750 | |
15751 | bool EvenInUse = false, OddInUse = false; |
15752 | for (int i = 0; i < 16; i += 2) { |
15753 | EvenInUse |= (Mask[i + 0] >= 0); |
15754 | OddInUse |= (Mask[i + 1] >= 0); |
15755 | if (EvenInUse && OddInUse) |
15756 | break; |
15757 | } |
15758 | V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, |
15759 | MVT::v16i8, EvenInUse ? V1 : DAG.getUNDEF(MVT::v16i8), |
15760 | OddInUse ? V1 : DAG.getUNDEF(MVT::v16i8)); |
15761 | |
15762 | int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; |
15763 | for (int i = 0; i < 16; ++i) |
15764 | if (Mask[i] >= 0) { |
15765 | int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8); |
15766 | assert(MappedMask < 8 && "Invalid v8 shuffle mask!"); |
15767 | if (PostDupI16Shuffle[i / 2] < 0) |
15768 | PostDupI16Shuffle[i / 2] = MappedMask; |
15769 | else |
15770 | assert(PostDupI16Shuffle[i / 2] == MappedMask && |
15771 | "Conflicting entries in the original shuffle!"); |
15772 | } |
15773 | return DAG.getBitcast( |
15774 | MVT::v16i8, |
15775 | DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), |
15776 | DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle)); |
15777 | }; |
15778 | if (SDValue V = tryToWidenViaDuplication()) |
15779 | return V; |
15780 | } |
15781 | |
15782 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, |
15783 | Zeroable, Subtarget, DAG)) |
15784 | return Masked; |
15785 | |
15786 | |
15787 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) |
15788 | return V; |
15789 | |
15790 | |
15791 | if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v16i8, V1, V2, Mask, |
15792 | Zeroable, Subtarget, DAG)) |
15793 | return V; |
15794 | |
15795 | |
15796 | bool IsSingleInput = V2.isUndef(); |
15797 | int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput); |
15798 | |
15799 | |
15800 | |
15801 | |
15802 | |
15803 | |
15804 | |
15805 | |
15806 | |
15807 | |
15808 | |
15809 | |
15810 | |
15811 | |
15812 | |
15813 | |
15814 | |
15815 | if (Subtarget.hasSSSE3() && (IsSingleInput || NumEvenDrops != 1)) { |
15816 | bool V1InUse = false; |
15817 | bool V2InUse = false; |
15818 | |
15819 | SDValue PSHUFB = lowerShuffleAsBlendOfPSHUFBs( |
15820 | DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse); |
15821 | |
15822 | |
15823 | |
15824 | |
15825 | if (V1InUse && V2InUse) { |
15826 | if (Subtarget.hasSSE41()) |
15827 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask, |
15828 | Zeroable, Subtarget, DAG)) |
15829 | return Blend; |
15830 | |
15831 | |
15832 | |
15833 | |
15834 | |
15835 | |
15836 | |
15837 | |
15838 | |
15839 | if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack( |
15840 | DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) |
15841 | return Unpack; |
15842 | |
15843 | |
15844 | if (Subtarget.hasVBMI()) |
15845 | return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, Subtarget, |
15846 | DAG); |
15847 | |
15848 | |
15849 | if (Subtarget.hasXOP()) { |
15850 | SDValue MaskNode = getConstVector(Mask, MVT::v16i8, DAG, DL, true); |
15851 | return DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, V1, V2, MaskNode); |
15852 | } |
15853 | |
15854 | |
15855 | |
15856 | if (SDValue V = lowerShuffleAsByteRotateAndPermute( |
15857 | DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) |
15858 | return V; |
15859 | } |
15860 | |
15861 | return PSHUFB; |
15862 | } |
15863 | |
15864 | |
15865 | if (NumV2Elements == 1) |
15866 | if (SDValue V = lowerShuffleAsElementInsertion( |
15867 | DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
15868 | return V; |
15869 | |
15870 | if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG)) |
15871 | return Blend; |
15872 | |
15873 | |
15874 | |
15875 | |
15876 | |
15877 | |
15878 | |
15879 | |
15880 | if (NumEvenDrops) { |
15881 | |
15882 | |
15883 | |
15884 | |
15885 | |
15886 | assert(NumEvenDrops <= 3 && |
15887 | "No support for dropping even elements more than 3 times."); |
15888 | SmallVector<SDValue, 8> WordClearOps(8, DAG.getConstant(0, DL, MVT::i16)); |
15889 | for (unsigned i = 0; i != 8; i += 1 << (NumEvenDrops - 1)) |
15890 | WordClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i16); |
15891 | SDValue WordClearMask = DAG.getBuildVector(MVT::v8i16, DL, WordClearOps); |
15892 | V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V1), |
15893 | WordClearMask); |
15894 | if (!IsSingleInput) |
15895 | V2 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V2), |
15896 | WordClearMask); |
15897 | |
15898 | |
15899 | SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, |
15900 | IsSingleInput ? V1 : V2); |
15901 | for (int i = 1; i < NumEvenDrops; ++i) { |
15902 | Result = DAG.getBitcast(MVT::v8i16, Result); |
15903 | Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result); |
15904 | } |
15905 | return Result; |
15906 | } |
15907 | |
15908 | |
15909 | if (NumV2Elements > 0) |
15910 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v16i8, V1, V2, Mask, |
15911 | Subtarget, DAG); |
15912 | |
15913 | |
15914 | |
15915 | |
15916 | SDValue V = V1; |
15917 | |
15918 | std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; |
15919 | std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; |
15920 | for (int i = 0; i < 16; ++i) |
15921 | if (Mask[i] >= 0) |
15922 | (i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i]; |
15923 | |
15924 | SDValue VLoHalf, VHiHalf; |
15925 | |
15926 | |
15927 | |
15928 | if (none_of(LoBlendMask, [](int M) { return M >= 0 && M % 2 == 1; }) && |
15929 | none_of(HiBlendMask, [](int M) { return M >= 0 && M % 2 == 1; })) { |
15930 | |
15931 | VLoHalf = DAG.getBitcast(MVT::v8i16, V); |
15932 | VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf, |
15933 | DAG.getConstant(0x00FF, DL, MVT::v8i16)); |
15934 | |
15935 | |
15936 | VHiHalf = DAG.getUNDEF(MVT::v8i16); |
15937 | |
15938 | |
15939 | for (int &M : LoBlendMask) |
15940 | if (M >= 0) |
15941 | M /= 2; |
15942 | for (int &M : HiBlendMask) |
15943 | if (M >= 0) |
15944 | M /= 2; |
15945 | } else { |
15946 | |
15947 | |
15948 | SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL); |
15949 | |
15950 | VLoHalf = DAG.getBitcast( |
15951 | MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); |
15952 | VHiHalf = DAG.getBitcast( |
15953 | MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); |
15954 | } |
15955 | |
15956 | SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask); |
15957 | SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask); |
15958 | |
15959 | return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV); |
15960 | } |
15961 | |
15962 | |
15963 | |
15964 | |
15965 | |
15966 | static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, |
15967 | MVT VT, SDValue V1, SDValue V2, |
15968 | const APInt &Zeroable, |
15969 | const X86Subtarget &Subtarget, |
15970 | SelectionDAG &DAG) { |
15971 | switch (VT.SimpleTy) { |
15972 | case MVT::v2i64: |
15973 | return lowerV2I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15974 | case MVT::v2f64: |
15975 | return lowerV2F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15976 | case MVT::v4i32: |
15977 | return lowerV4I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15978 | case MVT::v4f32: |
15979 | return lowerV4F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15980 | case MVT::v8i16: |
15981 | return lowerV8I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15982 | case MVT::v8f16: |
15983 | return lowerV8F16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15984 | case MVT::v16i8: |
15985 | return lowerV16I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15986 | |
15987 | default: |
15988 | llvm_unreachable("Unimplemented!"); |
15989 | } |
15990 | } |
15991 | |
15992 | |
15993 | |
15994 | |
15995 | |
15996 | |
15997 | static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1, |
15998 | SDValue V2, ArrayRef<int> Mask, |
15999 | SelectionDAG &DAG) { |
16000 | assert(VT.getSizeInBits() >= 256 && |
16001 | "Only for 256-bit or wider vector shuffles!"); |
16002 | assert(V1.getSimpleValueType() == VT && "Bad operand type!"); |
16003 | assert(V2.getSimpleValueType() == VT && "Bad operand type!"); |
16004 | |
16005 | ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2); |
16006 | ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2); |
16007 | |
16008 | int NumElements = VT.getVectorNumElements(); |
16009 | int SplitNumElements = NumElements / 2; |
16010 | MVT ScalarVT = VT.getVectorElementType(); |
16011 | MVT SplitVT = MVT::getVectorVT(ScalarVT, SplitNumElements); |
16012 | |
16013 | |
16014 | |
16015 | auto SplitVector = [&](SDValue V) { |
16016 | SDValue LoV, HiV; |
16017 | std::tie(LoV, HiV) = splitVector(peekThroughBitcasts(V), DAG, DL); |
16018 | return std::make_pair(DAG.getBitcast(SplitVT, LoV), |
16019 | DAG.getBitcast(SplitVT, HiV)); |
16020 | }; |
16021 | |
16022 | SDValue LoV1, HiV1, LoV2, HiV2; |
16023 | std::tie(LoV1, HiV1) = SplitVector(V1); |
16024 | std::tie(LoV2, HiV2) = SplitVector(V2); |
16025 | |
16026 | |
16027 | auto HalfBlend = [&](ArrayRef<int> HalfMask) { |
16028 | bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false; |
16029 | SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1); |
16030 | SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1); |
16031 | SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1); |
16032 | for (int i = 0; i < SplitNumElements; ++i) { |
16033 | int M = HalfMask[i]; |
16034 | if (M >= NumElements) { |
16035 | if (M >= NumElements + SplitNumElements) |
16036 | UseHiV2 = true; |
16037 | else |
16038 | UseLoV2 = true; |
16039 | V2BlendMask[i] = M - NumElements; |
16040 | BlendMask[i] = SplitNumElements + i; |
16041 | } else if (M >= 0) { |
16042 | if (M >= SplitNumElements) |
16043 | UseHiV1 = true; |
16044 | else |
16045 | UseLoV1 = true; |
16046 | V1BlendMask[i] = M; |
16047 | BlendMask[i] = i; |
16048 | } |
16049 | } |
16050 | |
16051 | |
16052 | |
16053 | |
16054 | |
16055 | |
16056 | if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2) |
16057 | return DAG.getUNDEF(SplitVT); |
16058 | if (!UseLoV2 && !UseHiV2) |
16059 | return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); |
16060 | if (!UseLoV1 && !UseHiV1) |
16061 | return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); |
16062 | |
16063 | SDValue V1Blend, V2Blend; |
16064 | if (UseLoV1 && UseHiV1) { |
16065 | V1Blend = |
16066 | DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); |
16067 | } else { |
16068 | |
16069 | V1Blend = UseLoV1 ? LoV1 : HiV1; |
16070 | for (int i = 0; i < SplitNumElements; ++i) |
16071 | if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements) |
16072 | BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements); |
16073 | } |
16074 | if (UseLoV2 && UseHiV2) { |
16075 | V2Blend = |
16076 | DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); |
16077 | } else { |
16078 | |
16079 | V2Blend = UseLoV2 ? LoV2 : HiV2; |
16080 | for (int i = 0; i < SplitNumElements; ++i) |
16081 | if (BlendMask[i] >= SplitNumElements) |
16082 | BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0); |
16083 | } |
16084 | return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask); |
16085 | }; |
16086 | SDValue Lo = HalfBlend(LoMask); |
16087 | SDValue Hi = HalfBlend(HiMask); |
16088 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
16089 | } |
16090 | |
16091 | |
16092 | |
16093 | |
16094 | |
16095 | |
16096 | |
16097 | |
16098 | |
16099 | static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, |
16100 | SDValue V2, ArrayRef<int> Mask, |
16101 | const X86Subtarget &Subtarget, |
16102 | SelectionDAG &DAG) { |
16103 | assert(!V2.isUndef() && "This routine must not be used to lower single-input " |
16104 | "shuffles as it could then recurse on itself."); |
16105 | int Size = Mask.size(); |
16106 | |
16107 | |
16108 | |
16109 | |
16110 | auto DoBothBroadcast = [&] { |
16111 | int V1BroadcastIdx = -1, V2BroadcastIdx = -1; |
16112 | for (int M : Mask) |
16113 | if (M >= Size) { |
16114 | if (V2BroadcastIdx < 0) |
16115 | V2BroadcastIdx = M - Size; |
16116 | else if (M - Size != V2BroadcastIdx) |
16117 | return false; |
16118 | } else if (M >= 0) { |
16119 | if (V1BroadcastIdx < 0) |
16120 | V1BroadcastIdx = M; |
16121 | else if (M != V1BroadcastIdx) |
16122 | return false; |
16123 | } |
16124 | return true; |
16125 | }; |
16126 | if (DoBothBroadcast()) |
16127 | return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, |
16128 | DAG); |
16129 | |
16130 | |
16131 | |
16132 | |
16133 | int LaneCount = VT.getSizeInBits() / 128; |
16134 | int LaneSize = Size / LaneCount; |
16135 | SmallBitVector LaneInputs[2]; |
16136 | LaneInputs[0].resize(LaneCount, false); |
16137 | LaneInputs[1].resize(LaneCount, false); |
16138 | for (int i = 0; i < Size; ++i) |
16139 | if (Mask[i] >= 0) |
16140 | LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true; |
16141 | if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1) |
16142 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
16143 | |
16144 | |
16145 | |
16146 | return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, |
16147 | DAG); |
16148 | } |
16149 | |
16150 | |
16151 | |
16152 | static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT, |
16153 | SDValue V1, SDValue V2, |
16154 | ArrayRef<int> Mask, |
16155 | SelectionDAG &DAG) { |
16156 | assert(VT == MVT::v4f64 && "Only for v4f64 shuffles"); |
16157 | |
16158 | int LHSMask[4] = {-1, -1, -1, -1}; |
16159 | int RHSMask[4] = {-1, -1, -1, -1}; |
16160 | unsigned SHUFPMask = 0; |
16161 | |
16162 | |
16163 | |
16164 | for (int i = 0; i != 4; ++i) { |
16165 | int M = Mask[i]; |
16166 | if (M < 0) |
16167 | continue; |
16168 | int LaneBase = i & ~1; |
16169 | auto &LaneMask = (i & 1) ? RHSMask : LHSMask; |
16170 | LaneMask[LaneBase + (M & 1)] = M; |
16171 | SHUFPMask |= (M & 1) << i; |
16172 | } |
16173 | |
16174 | SDValue LHS = DAG.getVectorShuffle(VT, DL, V1, V2, LHSMask); |
16175 | SDValue RHS = DAG.getVectorShuffle(VT, DL, V1, V2, RHSMask); |
16176 | return DAG.getNode(X86ISD::SHUFP, DL, VT, LHS, RHS, |
16177 | DAG.getTargetConstant(SHUFPMask, DL, MVT::i8)); |
16178 | } |
16179 | |
16180 | |
16181 | |
16182 | |
16183 | |
16184 | |
16185 | |
16186 | |
16187 | |
16188 | static SDValue lowerShuffleAsLanePermuteAndPermute( |
16189 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
16190 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
16191 | int NumElts = VT.getVectorNumElements(); |
16192 | int NumLanes = VT.getSizeInBits() / 128; |
16193 | int NumEltsPerLane = NumElts / NumLanes; |
16194 | bool CanUseSublanes = Subtarget.hasAVX2() && V2.isUndef(); |
16195 | |
16196 | |
16197 | |
16198 | |
16199 | |
16200 | |
16201 | auto getSublanePermute = [&](int NumSublanes) -> SDValue { |
16202 | int NumSublanesPerLane = NumSublanes / NumLanes; |
16203 | int NumEltsPerSublane = NumElts / NumSublanes; |
16204 | |
16205 | SmallVector<int, 16> CrossLaneMask; |
16206 | SmallVector<int, 16> InLaneMask(NumElts, SM_SentinelUndef); |
16207 | |
16208 | SmallVector<int, 16> CrossLaneMaskLarge(NumSublanes, SM_SentinelUndef); |
16209 | |
16210 | for (int i = 0; i != NumElts; ++i) { |
16211 | int M = Mask[i]; |
16212 | if (M < 0) |
16213 | continue; |
16214 | |
16215 | int SrcSublane = M / NumEltsPerSublane; |
16216 | int DstLane = i / NumEltsPerLane; |
16217 | |
16218 | |
16219 | |
16220 | bool Found = false; |
16221 | int DstSubStart = DstLane * NumSublanesPerLane; |
16222 | int DstSubEnd = DstSubStart + NumSublanesPerLane; |
16223 | for (int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) { |
16224 | if (!isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane)) |
16225 | continue; |
16226 | |
16227 | Found = true; |
16228 | CrossLaneMaskLarge[DstSublane] = SrcSublane; |
16229 | int DstSublaneOffset = DstSublane * NumEltsPerSublane; |
16230 | InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane; |
16231 | break; |
16232 | } |
16233 | if (!Found) |
16234 | return SDValue(); |
16235 | } |
16236 | |
16237 | |
16238 | narrowShuffleMaskElts(NumEltsPerSublane, CrossLaneMaskLarge, CrossLaneMask); |
16239 | |
16240 | if (!CanUseSublanes) { |
16241 | |
16242 | |
16243 | |
16244 | |
16245 | int NumIdentityLanes = 0; |
16246 | bool OnlyShuffleLowestLane = true; |
16247 | for (int i = 0; i != NumLanes; ++i) { |
16248 | int LaneOffset = i * NumEltsPerLane; |
16249 | if (isSequentialOrUndefInRange(InLaneMask, LaneOffset, NumEltsPerLane, |
16250 | i * NumEltsPerLane)) |
16251 | NumIdentityLanes++; |
16252 | else if (CrossLaneMask[LaneOffset] != 0) |
16253 | OnlyShuffleLowestLane = false; |
16254 | } |
16255 | if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1)) |
16256 | return SDValue(); |
16257 | } |
16258 | |
16259 | SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask); |
16260 | return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT), |
16261 | InLaneMask); |
16262 | }; |
16263 | |
16264 | |
16265 | if (SDValue V = getSublanePermute(NumLanes)) |
16266 | return V; |
16267 | |
16268 | |
16269 | if (!CanUseSublanes) |
16270 | return SDValue(); |
16271 | |
16272 | |
16273 | if (SDValue V = getSublanePermute(NumLanes * 2)) |
16274 | return V; |
16275 | |
16276 | |
16277 | |
16278 | if (!Subtarget.hasFastVariableCrossLaneShuffle()) |
16279 | return SDValue(); |
16280 | |
16281 | return getSublanePermute(NumLanes * 4); |
16282 | } |
16283 | |
16284 | |
16285 | |
16286 | |
16287 | |
16288 | |
16289 | |
16290 | |
16291 | static SDValue lowerShuffleAsLanePermuteAndShuffle( |
16292 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
16293 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
16294 | |
16295 | assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!"); |
16296 | int Size = Mask.size(); |
16297 | int LaneSize = Size / 2; |
16298 | |
16299 | |
16300 | |
16301 | |
16302 | if (VT == MVT::v4f64 && |
16303 | !all_of(Mask, [LaneSize](int M) { return M < LaneSize; })) |
16304 | if (SDValue V = |
16305 | lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG)) |
16306 | return V; |
16307 | |
16308 | |
16309 | |
16310 | |
16311 | if (!Subtarget.hasAVX2()) { |
16312 | bool LaneCrossing[2] = {false, false}; |
16313 | for (int i = 0; i < Size; ++i) |
16314 | if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize)) |
16315 | LaneCrossing[(Mask[i] % Size) / LaneSize] = true; |
16316 | if (!LaneCrossing[0] || !LaneCrossing[1]) |
16317 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
16318 | } else { |
16319 | bool LaneUsed[2] = {false, false}; |
16320 | for (int i = 0; i < Size; ++i) |
16321 | if (Mask[i] >= 0) |
16322 | LaneUsed[(Mask[i] % Size) / LaneSize] = true; |
16323 | if (!LaneUsed[0] || !LaneUsed[1]) |
16324 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
16325 | } |
16326 | |
16327 | |
16328 | assert(V2.isUndef() && |
16329 | "This last part of this routine only works on single input shuffles"); |
16330 | |
16331 | SmallVector<int, 32> InLaneMask(Mask.begin(), Mask.end()); |
16332 | for (int i = 0; i < Size; ++i) { |
16333 | int &M = InLaneMask[i]; |
16334 | if (M < 0) |
16335 | continue; |
16336 | if (((M % Size) / LaneSize) != (i / LaneSize)) |
16337 | M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size; |
16338 | } |
16339 | assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) && |
16340 | "In-lane shuffle mask expected"); |
16341 | |
16342 | |
16343 | MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; |
16344 | SDValue Flipped = DAG.getBitcast(PVT, V1); |
16345 | Flipped = |
16346 | DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1}); |
16347 | Flipped = DAG.getBitcast(VT, Flipped); |
16348 | return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask); |
16349 | } |
16350 | |
16351 | |
16352 | static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, |
16353 | SDValue V2, ArrayRef<int> Mask, |
16354 | const APInt &Zeroable, |
16355 | const X86Subtarget &Subtarget, |
16356 | SelectionDAG &DAG) { |
16357 | if (V2.isUndef()) { |
16358 | |
16359 | bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1); |
16360 | bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1); |
16361 | if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() && |
16362 | MayFoldLoad(peekThroughOneUseBitcasts(V1))) { |
16363 | auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1)); |
16364 | if (!Ld->isNonTemporal()) { |
16365 | MVT MemVT = VT.getHalfNumVectorElementsVT(); |
16366 | unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize(); |
16367 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
16368 | SDValue Ptr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), |
16369 | TypeSize::Fixed(Ofs), DL); |
16370 | SDValue Ops[] = {Ld->getChain(), Ptr}; |
16371 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
16372 | X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, MemVT, |
16373 | DAG.getMachineFunction().getMachineMemOperand( |
16374 | Ld->getMemOperand(), Ofs, MemVT.getStoreSize())); |
16375 | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1)); |
16376 | return BcastLd; |
16377 | } |
16378 | } |
16379 | |
16380 | |
16381 | if (Subtarget.hasAVX2()) |
16382 | return SDValue(); |
16383 | } |
16384 | |
16385 | bool V2IsZero = !V2.isUndef() && ISD::isBuildVectorAllZeros(V2.getNode()); |
16386 | |
16387 | SmallVector<int, 4> WidenedMask; |
16388 | if (!canWidenShuffleElements(Mask, Zeroable, V2IsZero, WidenedMask)) |
16389 | return SDValue(); |
16390 | |
16391 | bool IsLowZero = (Zeroable & 0x3) == 0x3; |
16392 | bool IsHighZero = (Zeroable & 0xc) == 0xc; |
16393 | |
16394 | |
16395 | if (WidenedMask[0] == 0 && IsHighZero) { |
16396 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); |
16397 | SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, |
16398 | DAG.getIntPtrConstant(0, DL)); |
16399 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
16400 | getZeroVector(VT, Subtarget, DAG, DL), LoV, |
16401 | DAG.getIntPtrConstant(0, DL)); |
16402 | } |
16403 | |
16404 | |
16405 | |
16406 | |
16407 | |
16408 | |
16409 | if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, |
16410 | Subtarget, DAG)) |
16411 | return Blend; |
16412 | |
16413 | |
16414 | |
16415 | if (!IsLowZero && !IsHighZero) { |
16416 | |
16417 | |
16418 | bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2); |
16419 | if (OnlyUsesV1 || isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2)) { |
16420 | |
16421 | |
16422 | |
16423 | if (!isa<LoadSDNode>(peekThroughBitcasts(V1))) { |
16424 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); |
16425 | SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, |
16426 | OnlyUsesV1 ? V1 : V2, |
16427 | DAG.getIntPtrConstant(0, DL)); |
16428 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, |
16429 | DAG.getIntPtrConstant(2, DL)); |
16430 | } |
16431 | } |
16432 | |
16433 | |
16434 | if (Subtarget.hasVLX()) { |
16435 | if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { |
16436 | unsigned PermMask = ((WidenedMask[0] % 2) << 0) | |
16437 | ((WidenedMask[1] % 2) << 1); |
16438 | return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, |
16439 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
16440 | } |
16441 | } |
16442 | } |
16443 | |
16444 | |
16445 | |
16446 | |
16447 | |
16448 | |
16449 | |
16450 | |
16451 | |
16452 | |
16453 | |
16454 | |
16455 | |
16456 | |
16457 | assert((WidenedMask[0] >= 0 || IsLowZero) && |
16458 | (WidenedMask[1] >= 0 || IsHighZero) && "Undef half?"); |
16459 | |
16460 | unsigned PermMask = 0; |
16461 | PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0); |
16462 | PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4); |
16463 | |
16464 | |
16465 | if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00) |
16466 | V1 = DAG.getUNDEF(VT); |
16467 | if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20) |
16468 | V2 = DAG.getUNDEF(VT); |
16469 | |
16470 | return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, |
16471 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
16472 | } |
16473 | |
16474 | |
16475 | |
16476 | |
16477 | |
16478 | |
16479 | |
16480 | |
16481 | static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( |
16482 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
16483 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
16484 | assert(!V2.isUndef() && "This is only useful with multiple inputs."); |
16485 | |
16486 | if (is128BitLaneRepeatedShuffleMask(VT, Mask)) |
16487 | return SDValue(); |
16488 | |
16489 | int NumElts = Mask.size(); |
16490 | int NumLanes = VT.getSizeInBits() / 128; |
16491 | int NumLaneElts = 128 / VT.getScalarSizeInBits(); |
16492 | SmallVector<int, 16> RepeatMask(NumLaneElts, -1); |
16493 | SmallVector<std::array<int, 2>, 2> LaneSrcs(NumLanes, {{-1, -1}}); |
16494 | |
16495 | |
16496 | |
16497 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
16498 | int Srcs[2] = {-1, -1}; |
16499 | SmallVector<int, 16> InLaneMask(NumLaneElts, -1); |
16500 | for (int i = 0; i != NumLaneElts; ++i) { |
16501 | int M = Mask[(Lane * NumLaneElts) + i]; |
16502 | if (M < 0) |
16503 | continue; |
16504 | |
16505 | |
16506 | |
16507 | |
16508 | int LaneSrc = M / NumLaneElts; |
16509 | int Src; |
16510 | if (Srcs[0] < 0 || Srcs[0] == LaneSrc) |
16511 | Src = 0; |
16512 | else if (Srcs[1] < 0 || Srcs[1] == LaneSrc) |
16513 | Src = 1; |
16514 | else |
16515 | return SDValue(); |
16516 | |
16517 | Srcs[Src] = LaneSrc; |
16518 | InLaneMask[i] = (M % NumLaneElts) + Src * NumElts; |
16519 | } |
16520 | |
16521 | |
16522 | if (Srcs[1] < 0) |
16523 | continue; |
16524 | |
16525 | LaneSrcs[Lane][0] = Srcs[0]; |
16526 | LaneSrcs[Lane][1] = Srcs[1]; |
16527 | |
16528 | auto MatchMasks = [](ArrayRef<int> M1, ArrayRef<int> M2) { |
16529 | assert(M1.size() == M2.size() && "Unexpected mask size"); |
16530 | for (int i = 0, e = M1.size(); i != e; ++i) |
16531 | if (M1[i] >= 0 && M2[i] >= 0 && M1[i] != M2[i]) |
16532 | return false; |
16533 | return true; |
16534 | }; |
16535 | |
16536 | auto MergeMasks = [](ArrayRef<int> Mask, MutableArrayRef<int> MergedMask) { |
16537 | assert(Mask.size() == MergedMask.size() && "Unexpected mask size"); |
16538 | for (int i = 0, e = MergedMask.size(); i != e; ++i) { |
16539 | int M = Mask[i]; |
16540 | if (M < 0) |
16541 | continue; |
16542 | assert((MergedMask[i] < 0 || MergedMask[i] == M) && |
16543 | "Unexpected mask element"); |
16544 | MergedMask[i] = M; |
16545 | } |
16546 | }; |
16547 | |
16548 | if (MatchMasks(InLaneMask, RepeatMask)) { |
16549 | |
16550 | MergeMasks(InLaneMask, RepeatMask); |
16551 | continue; |
16552 | } |
16553 | |
16554 | |
16555 | std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]); |
16556 | ShuffleVectorSDNode::commuteMask(InLaneMask); |
16557 | |
16558 | if (MatchMasks(InLaneMask, RepeatMask)) { |
16559 | |
16560 | MergeMasks(InLaneMask, RepeatMask); |
16561 | continue; |
16562 | } |
16563 | |
16564 | |
16565 | return SDValue(); |
16566 | } |
16567 | |
16568 | |
16569 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
16570 | |
16571 | if (LaneSrcs[Lane][0] >= 0) |
16572 | continue; |
16573 | |
16574 | for (int i = 0; i != NumLaneElts; ++i) { |
16575 | int M = Mask[(Lane * NumLaneElts) + i]; |
16576 | if (M < 0) |
16577 | continue; |
16578 | |
16579 | |
16580 | if (RepeatMask[i] < 0) |
16581 | RepeatMask[i] = M % NumLaneElts; |
16582 | |
16583 | if (RepeatMask[i] < NumElts) { |
16584 | if (RepeatMask[i] != M % NumLaneElts) |
16585 | return SDValue(); |
16586 | LaneSrcs[Lane][0] = M / NumLaneElts; |
16587 | } else { |
16588 | if (RepeatMask[i] != ((M % NumLaneElts) + NumElts)) |
16589 | return SDValue(); |
16590 | LaneSrcs[Lane][1] = M / NumLaneElts; |
16591 | } |
16592 | } |
16593 | |
16594 | if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0) |
16595 | return SDValue(); |
16596 | } |
16597 | |
16598 | SmallVector<int, 16> NewMask(NumElts, -1); |
16599 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
16600 | int Src = LaneSrcs[Lane][0]; |
16601 | for (int i = 0; i != NumLaneElts; ++i) { |
16602 | int M = -1; |
16603 | if (Src >= 0) |
16604 | M = Src * NumLaneElts + i; |
16605 | NewMask[Lane * NumLaneElts + i] = M; |
16606 | } |
16607 | } |
16608 | SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); |
16609 | |
16610 | |
16611 | |
16612 | if (isa<ShuffleVectorSDNode>(NewV1) && |
16613 | cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask) |
16614 | return SDValue(); |
16615 | |
16616 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
16617 | int Src = LaneSrcs[Lane][1]; |
16618 | for (int i = 0; i != NumLaneElts; ++i) { |
16619 | int M = -1; |
16620 | if (Src >= 0) |
16621 | M = Src * NumLaneElts + i; |
16622 | NewMask[Lane * NumLaneElts + i] = M; |
16623 | } |
16624 | } |
16625 | SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); |
16626 | |
16627 | |
16628 | |
16629 | if (isa<ShuffleVectorSDNode>(NewV2) && |
16630 | cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask) |
16631 | return SDValue(); |
16632 | |
16633 | for (int i = 0; i != NumElts; ++i) { |
16634 | NewMask[i] = RepeatMask[i % NumLaneElts]; |
16635 | if (NewMask[i] < 0) |
16636 | continue; |
16637 | |
16638 | NewMask[i] += (i / NumLaneElts) * NumLaneElts; |
16639 | } |
16640 | return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask); |
16641 | } |
16642 | |
16643 | |
16644 | |
16645 | |
16646 | |
16647 | |
16648 | |
16649 | static bool |
16650 | getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask, |
16651 | int &HalfIdx1, int &HalfIdx2) { |
16652 | assert((Mask.size() == HalfMask.size() * 2) && |
16653 | "Expected input mask to be twice as long as output"); |
16654 | |
16655 | |
16656 | bool UndefLower = isUndefLowerHalf(Mask); |
16657 | bool UndefUpper = isUndefUpperHalf(Mask); |
16658 | if (UndefLower == UndefUpper) |
16659 | return false; |
16660 | |
16661 | unsigned HalfNumElts = HalfMask.size(); |
16662 | unsigned MaskIndexOffset = UndefLower ? HalfNumElts : 0; |
16663 | HalfIdx1 = -1; |
16664 | HalfIdx2 = -1; |
16665 | for (unsigned i = 0; i != HalfNumElts; ++i) { |
16666 | int M = Mask[i + MaskIndexOffset]; |
16667 | if (M < 0) { |
16668 | HalfMask[i] = M; |
16669 | continue; |
16670 | } |
16671 | |
16672 | |
16673 | |
16674 | int HalfIdx = M / HalfNumElts; |
16675 | |
16676 | |
16677 | int HalfElt = M % HalfNumElts; |
16678 | |
16679 | |
16680 | |
16681 | if (HalfIdx1 < 0 || HalfIdx1 == HalfIdx) { |
16682 | HalfMask[i] = HalfElt; |
16683 | HalfIdx1 = HalfIdx; |
16684 | continue; |
16685 | } |
16686 | if (HalfIdx2 < 0 || HalfIdx2 == HalfIdx) { |
16687 | HalfMask[i] = HalfElt + HalfNumElts; |
16688 | HalfIdx2 = HalfIdx; |
16689 | continue; |
16690 | } |
16691 | |
16692 | |
16693 | return false; |
16694 | } |
16695 | |
16696 | return true; |
16697 | } |
16698 | |
16699 | |
16700 | |
16701 | static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, |
16702 | ArrayRef<int> HalfMask, int HalfIdx1, |
16703 | int HalfIdx2, bool UndefLower, |
16704 | SelectionDAG &DAG, bool UseConcat = false) { |
16705 | assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?"); |
16706 | assert(V1.getValueType().isSimple() && "Expecting only simple types"); |
16707 | |
16708 | MVT VT = V1.getSimpleValueType(); |
16709 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
16710 | unsigned HalfNumElts = HalfVT.getVectorNumElements(); |
16711 | |
16712 | auto getHalfVector = [&](int HalfIdx) { |
16713 | if (HalfIdx < 0) |
16714 | return DAG.getUNDEF(HalfVT); |
16715 | SDValue V = (HalfIdx < 2 ? V1 : V2); |
16716 | HalfIdx = (HalfIdx % 2) * HalfNumElts; |
16717 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V, |
16718 | DAG.getIntPtrConstant(HalfIdx, DL)); |
16719 | }; |
16720 | |
16721 | |
16722 | SDValue Half1 = getHalfVector(HalfIdx1); |
16723 | SDValue Half2 = getHalfVector(HalfIdx2); |
16724 | SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask); |
16725 | if (UseConcat) { |
16726 | SDValue Op0 = V; |
16727 | SDValue Op1 = DAG.getUNDEF(HalfVT); |
16728 | if (UndefLower) |
16729 | std::swap(Op0, Op1); |
16730 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1); |
16731 | } |
16732 | |
16733 | unsigned Offset = UndefLower ? HalfNumElts : 0; |
16734 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, |
16735 | DAG.getIntPtrConstant(Offset, DL)); |
16736 | } |
16737 | |
16738 | |
16739 | |
16740 | |
16741 | static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, |
16742 | SDValue V2, ArrayRef<int> Mask, |
16743 | const X86Subtarget &Subtarget, |
16744 | SelectionDAG &DAG) { |
16745 | assert((VT.is256BitVector() || VT.is512BitVector()) && |
16746 | "Expected 256-bit or 512-bit vector"); |
16747 | |
16748 | bool UndefLower = isUndefLowerHalf(Mask); |
16749 | if (!UndefLower && !isUndefUpperHalf(Mask)) |
16750 | return SDValue(); |
16751 | |
16752 | assert((!UndefLower || !isUndefUpperHalf(Mask)) && |
16753 | "Completely undef shuffle mask should have been simplified already"); |
16754 | |
16755 | |
16756 | |
16757 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
16758 | unsigned HalfNumElts = HalfVT.getVectorNumElements(); |
16759 | if (!UndefLower && |
16760 | isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) { |
16761 | SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, |
16762 | DAG.getIntPtrConstant(HalfNumElts, DL)); |
16763 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, |
16764 | DAG.getIntPtrConstant(0, DL)); |
16765 | } |
16766 | |
16767 | |
16768 | |
16769 | if (UndefLower && |
16770 | isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) { |
16771 | SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, |
16772 | DAG.getIntPtrConstant(0, DL)); |
16773 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, |
16774 | DAG.getIntPtrConstant(HalfNumElts, DL)); |
16775 | } |
16776 | |
16777 | int HalfIdx1, HalfIdx2; |
16778 | SmallVector<int, 8> HalfMask(HalfNumElts); |
16779 | if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2)) |
16780 | return SDValue(); |
16781 | |
16782 | assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length"); |
16783 | |
16784 | |
16785 | unsigned NumLowerHalves = |
16786 | (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2); |
16787 | unsigned NumUpperHalves = |
16788 | (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3); |
16789 | assert(NumLowerHalves + NumUpperHalves <= 2 && "Only 1 or 2 halves allowed"); |
16790 | |
16791 | |
16792 | |
16793 | unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); |
16794 | if (!UndefLower) { |
16795 | |
16796 | |
16797 | if (NumUpperHalves == 0) |
16798 | return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2, |
16799 | UndefLower, DAG); |
16800 | |
16801 | if (NumUpperHalves == 1) { |
16802 | |
16803 | if (Subtarget.hasAVX2()) { |
16804 | |
16805 | if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() && |
16806 | !is128BitUnpackShuffleMask(HalfMask) && |
16807 | (!isSingleSHUFPSMask(HalfMask) || |
16808 | Subtarget.hasFastVariableCrossLaneShuffle())) |
16809 | return SDValue(); |
16810 | |
16811 | |
16812 | |
16813 | |
16814 | if (EltWidth == 64 && V2.isUndef()) |
16815 | return SDValue(); |
16816 | } |
16817 | |
16818 | if (Subtarget.hasAVX512() && VT.is512BitVector()) |
16819 | return SDValue(); |
16820 | |
16821 | return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2, |
16822 | UndefLower, DAG); |
16823 | } |
16824 | |
16825 | |
16826 | assert(NumUpperHalves == 2 && "Half vector count went wrong"); |
16827 | return SDValue(); |
16828 | } |
16829 | |
16830 | |
16831 | if (NumUpperHalves == 0) { |
16832 | |
16833 | |
16834 | if (Subtarget.hasAVX2() && EltWidth == 64) |
16835 | return SDValue(); |
16836 | |
16837 | if (Subtarget.hasAVX512() && VT.is512BitVector()) |
16838 | return SDValue(); |
16839 | |
16840 | return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2, |
16841 | UndefLower, DAG); |
16842 | } |
16843 | |
16844 | |
16845 | return SDValue(); |
16846 | } |
16847 | |
16848 | |
16849 | |
16850 | |
16851 | |
16852 | |
16853 | static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) { |
16854 | assert((Input == 0 || Input == 1) && "Only two inputs to shuffles."); |
16855 | int Size = Mask.size(); |
16856 | for (int i = 0; i < Size; ++i) |
16857 | if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i) |
16858 | return false; |
16859 | |
16860 | return true; |
16861 | } |
16862 | |
16863 | |
16864 | |
16865 | |
16866 | |
16867 | static SDValue lowerShuffleAsRepeatedMaskAndLanePermute( |
16868 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
16869 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
16870 | int NumElts = VT.getVectorNumElements(); |
16871 | int NumLanes = VT.getSizeInBits() / 128; |
16872 | int NumLaneElts = NumElts / NumLanes; |
16873 | |
16874 | |
16875 | |
16876 | if (Subtarget.hasAVX2()) { |
| 6 | | Calling 'X86Subtarget::hasAVX2' | |
|
| 8 | | Returning from 'X86Subtarget::hasAVX2' | |
|
| |
16877 | for (unsigned BroadcastSize : {16, 32, 64}) { |
16878 | if (BroadcastSize <= VT.getScalarSizeInBits()) |
16879 | continue; |
16880 | int NumBroadcastElts = BroadcastSize / VT.getScalarSizeInBits(); |
16881 | |
16882 | |
16883 | |
16884 | |
16885 | auto FindRepeatingBroadcastMask = [&](SmallVectorImpl<int> &RepeatMask) { |
16886 | for (int i = 0; i != NumElts; i += NumBroadcastElts) |
16887 | for (int j = 0; j != NumBroadcastElts; ++j) { |
16888 | int M = Mask[i + j]; |
16889 | if (M < 0) |
16890 | continue; |
16891 | int &R = RepeatMask[j]; |
16892 | if (0 != ((M % NumElts) / NumLaneElts)) |
16893 | return false; |
16894 | if (0 <= R && R != M) |
16895 | return false; |
16896 | R = M; |
16897 | } |
16898 | return true; |
16899 | }; |
16900 | |
16901 | SmallVector<int, 8> RepeatMask((unsigned)NumElts, -1); |
16902 | if (!FindRepeatingBroadcastMask(RepeatMask)) |
16903 | continue; |
16904 | |
16905 | |
16906 | SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask); |
16907 | |
16908 | |
16909 | SmallVector<int, 8> BroadcastMask((unsigned)NumElts, -1); |
16910 | for (int i = 0; i != NumElts; i += NumBroadcastElts) |
16911 | for (int j = 0; j != NumBroadcastElts; ++j) |
16912 | BroadcastMask[i + j] = j; |
16913 | return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT), |
16914 | BroadcastMask); |
16915 | } |
16916 | } |
16917 | |
16918 | |
16919 | if (!is128BitLaneCrossingShuffleMask(VT, Mask)) |
| 10 | | Assuming the condition is false | |
|
| |
16920 | return SDValue(); |
16921 | |
16922 | |
16923 | SmallVector<int, 8> RepeatedShuffleMask; |
16924 | if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedShuffleMask)) |
| 12 | | Assuming the condition is false | |
|
16925 | return SDValue(); |
16926 | |
16927 | |
16928 | |
16929 | int SubLaneScale = Subtarget.hasAVX2() && VT.is256BitVector() ? 2 : 1; |
16930 | int NumSubLanes = NumLanes * SubLaneScale; |
16931 | int NumSubLaneElts = NumLaneElts / SubLaneScale; |
| 13 | | 'NumSubLaneElts' initialized here | |
|
16932 | |
16933 | |
16934 | |
16935 | |
16936 | int TopSrcSubLane = -1; |
16937 | SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1); |
16938 | SmallVector<int, 8> RepeatedSubLaneMasks[2] = { |
16939 | SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef), |
16940 | SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef)}; |
16941 | |
16942 | for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) { |
| 14 | | Loop condition is true. Entering loop body | |
|
| 19 | | Assuming 'DstSubLane' is equal to 'NumSubLanes' | |
|
| 20 | | Loop condition is false. Execution continues on line 17001 | |
|
16943 | |
16944 | |
16945 | int SrcLane = -1; |
16946 | SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1); |
16947 | for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { |
| 15 | | Assuming 'Elt' is equal to 'NumSubLaneElts' | |
|
| 16 | | Loop condition is false. Execution continues on line 16960 | |
|
16948 | int M = Mask[(DstSubLane * NumSubLaneElts) + Elt]; |
16949 | if (M < 0) |
16950 | continue; |
16951 | int Lane = (M % NumElts) / NumLaneElts; |
16952 | if ((0 <= SrcLane) && (SrcLane != Lane)) |
16953 | return SDValue(); |
16954 | SrcLane = Lane; |
16955 | int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts); |
16956 | SubLaneMask[Elt] = LocalM; |
16957 | } |
16958 | |
16959 | |
16960 | if (SrcLane < 0) |
| |
16961 | continue; |
| 18 | | Execution continues on line 16942 | |
|
16962 | |
16963 | |
16964 | for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) { |
16965 | auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) { |
16966 | for (int i = 0; i != NumSubLaneElts; ++i) { |
16967 | if (M1[i] < 0 || M2[i] < 0) |
16968 | continue; |
16969 | if (M1[i] != M2[i]) |
16970 | return false; |
16971 | } |
16972 | return true; |
16973 | }; |
16974 | |
16975 | auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane]; |
16976 | if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask)) |
16977 | continue; |
16978 | |
16979 | |
16980 | for (int i = 0; i != NumSubLaneElts; ++i) { |
16981 | int M = SubLaneMask[i]; |
16982 | if (M < 0) |
16983 | continue; |
16984 | assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) && |
16985 | "Unexpected mask element"); |
16986 | RepeatedSubLaneMask[i] = M; |
16987 | } |
16988 | |
16989 | |
16990 | |
16991 | int SrcSubLane = (SrcLane * SubLaneScale) + SubLane; |
16992 | TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane); |
16993 | Dst2SrcSubLanes[DstSubLane] = SrcSubLane; |
16994 | break; |
16995 | } |
16996 | |
16997 | |
16998 | if (Dst2SrcSubLanes[DstSubLane] < 0) |
16999 | return SDValue(); |
17000 | } |
17001 | assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes && |
17002 | "Unexpected source lane"); |
17003 | |
17004 | |
17005 | SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1); |
17006 | for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) { |
| 21 | | Loop condition is false. Execution continues on line 17017 | |
|
17007 | int Lane = SubLane / SubLaneScale; |
17008 | auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale]; |
17009 | for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { |
17010 | int M = RepeatedSubLaneMask[Elt]; |
17011 | if (M < 0) |
17012 | continue; |
17013 | int Idx = (SubLane * NumSubLaneElts) + Elt; |
17014 | RepeatedMask[Idx] = M + (Lane * NumLaneElts); |
17015 | } |
17016 | } |
17017 | SDValue RepeatedShuffle = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask); |
17018 | |
17019 | |
17020 | SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1); |
17021 | for (int i = 0; i != NumElts; i += NumSubLaneElts) { |
| 22 | | Assuming 'i' is not equal to 'NumElts' | |
|
| 23 | | Loop condition is true. Entering loop body | |
|
17022 | int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts]; |
| |
17023 | if (SrcSubLane < 0) |
17024 | continue; |
17025 | for (int j = 0; j != NumSubLaneElts; ++j) |
17026 | SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts); |
17027 | } |
17028 | |
17029 | return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT), |
17030 | SubLaneMask); |
17031 | } |
17032 | |
17033 | static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, |
17034 | bool &ForceV1Zero, bool &ForceV2Zero, |
17035 | unsigned &ShuffleImm, ArrayRef<int> Mask, |
17036 | const APInt &Zeroable) { |
17037 | int NumElts = VT.getVectorNumElements(); |
17038 | assert(VT.getScalarSizeInBits() == 64 && |
17039 | (NumElts == 2 || NumElts == 4 || NumElts == 8) && |
17040 | "Unexpected data type for VSHUFPD"); |
17041 | assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) && |
17042 | "Illegal shuffle mask"); |
17043 | |
17044 | bool ZeroLane[2] = { true, true }; |
17045 | for (int i = 0; i < NumElts; ++i) |
17046 | ZeroLane[i & 1] &= Zeroable[i]; |
17047 | |
17048 | |
17049 | |
17050 | ShuffleImm = 0; |
17051 | bool ShufpdMask = true; |
17052 | bool CommutableMask = true; |
17053 | for (int i = 0; i < NumElts; ++i) { |
17054 | if (Mask[i] == SM_SentinelUndef || ZeroLane[i & 1]) |
17055 | continue; |
17056 | if (Mask[i] < 0) |
17057 | return false; |
17058 | int Val = (i & 6) + NumElts * (i & 1); |
17059 | int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1); |
17060 | if (Mask[i] < Val || Mask[i] > Val + 1) |
17061 | ShufpdMask = false; |
17062 | if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1) |
17063 | CommutableMask = false; |
17064 | ShuffleImm |= (Mask[i] % 2) << i; |
17065 | } |
17066 | |
17067 | if (!ShufpdMask && !CommutableMask) |
17068 | return false; |
17069 | |
17070 | if (!ShufpdMask && CommutableMask) |
17071 | std::swap(V1, V2); |
17072 | |
17073 | ForceV1Zero = ZeroLane[0]; |
17074 | ForceV2Zero = ZeroLane[1]; |
17075 | return true; |
17076 | } |
17077 | |
17078 | static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1, |
17079 | SDValue V2, ArrayRef<int> Mask, |
17080 | const APInt &Zeroable, |
17081 | const X86Subtarget &Subtarget, |
17082 | SelectionDAG &DAG) { |
17083 | assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) && |
17084 | "Unexpected data type for VSHUFPD"); |
17085 | |
17086 | unsigned Immediate = 0; |
17087 | bool ForceV1Zero = false, ForceV2Zero = false; |
17088 | if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate, |
17089 | Mask, Zeroable)) |
17090 | return SDValue(); |
17091 | |
17092 | |
17093 | if (ForceV1Zero) |
17094 | V1 = getZeroVector(VT, Subtarget, DAG, DL); |
17095 | if (ForceV2Zero) |
17096 | V2 = getZeroVector(VT, Subtarget, DAG, DL); |
17097 | |
17098 | return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, |
17099 | DAG.getTargetConstant(Immediate, DL, MVT::i8)); |
17100 | } |
17101 | |
17102 | |
17103 | |
17104 | |
17105 | static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT, |
17106 | SDValue V1, SDValue V2, |
17107 | ArrayRef<int> Mask, |
17108 | const APInt &Zeroable, |
17109 | SelectionDAG &DAG) { |
17110 | assert(VT == MVT::v32i8 && "Unexpected type!"); |
17111 | |
17112 | |
17113 | if (!isSequentialOrUndefInRange(Mask, 0, 8, 0, 8)) |
17114 | return SDValue(); |
17115 | |
17116 | |
17117 | if (Zeroable.countLeadingOnes() < (Mask.size() - 8)) |
17118 | return SDValue(); |
17119 | |
17120 | V1 = DAG.getBitcast(MVT::v4i64, V1); |
17121 | V2 = DAG.getBitcast(MVT::v4i64, V2); |
17122 | |
17123 | V1 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V1); |
17124 | V2 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V2); |
17125 | |
17126 | |
17127 | |
17128 | SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, |
17129 | { 0, 1, 2, 3, 16, 17, 18, 19, |
17130 | 4, 5, 6, 7, 20, 21, 22, 23 }); |
17131 | |
17132 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v32i8, |
17133 | DAG.getConstant(0, DL, MVT::v32i8), Unpack, |
17134 | DAG.getIntPtrConstant(0, DL)); |
17135 | } |
17136 | |
17137 | |
17138 | |
17139 | |
17140 | |
17141 | |
17142 | static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17143 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17144 | const X86Subtarget &Subtarget, |
17145 | SelectionDAG &DAG) { |
17146 | assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); |
17147 | assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); |
17148 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
17149 | |
17150 | if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4f64, V1, V2, Mask, Zeroable, |
17151 | Subtarget, DAG)) |
17152 | return V; |
17153 | |
17154 | if (V2.isUndef()) { |
17155 | |
17156 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f64, V1, V2, |
17157 | Mask, Subtarget, DAG)) |
17158 | return Broadcast; |
17159 | |
17160 | |
17161 | if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2)) |
17162 | return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1); |
17163 | |
17164 | if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) { |
17165 | |
17166 | |
17167 | unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | |
17168 | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); |
17169 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, |
17170 | DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); |
17171 | } |
17172 | |
17173 | |
17174 | if (Subtarget.hasAVX2()) |
17175 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1, |
17176 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
17177 | |
17178 | |
17179 | |
17180 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17181 | DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) |
17182 | return V; |
17183 | |
17184 | |
17185 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v4f64, V1, V2, |
17186 | Mask, DAG, Subtarget)) |
17187 | return V; |
17188 | |
17189 | |
17190 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v4f64, V1, V2, Mask, |
17191 | DAG, Subtarget); |
17192 | } |
17193 | |
17194 | |
17195 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG)) |
17196 | return V; |
17197 | |
17198 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, |
17199 | Zeroable, Subtarget, DAG)) |
17200 | return Blend; |
17201 | |
17202 | |
17203 | if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask, |
17204 | Zeroable, Subtarget, DAG)) |
17205 | return Op; |
17206 | |
17207 | |
17208 | |
17209 | |
17210 | |
17211 | if (is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask) && |
17212 | !all_of(Mask, [](int M) { return M < 2 || (4 <= M && M < 6); }) && |
17213 | (V1.getOpcode() != ISD::BUILD_VECTOR) && |
17214 | (V2.getOpcode() != ISD::BUILD_VECTOR)) |
17215 | if (SDValue Op = lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2, |
17216 | Mask, DAG)) |
17217 | return Op; |
17218 | |
17219 | |
17220 | |
17221 | if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) |
17222 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, |
17223 | Subtarget, DAG); |
17224 | |
17225 | |
17226 | |
17227 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17228 | DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) |
17229 | return V; |
17230 | |
17231 | |
17232 | |
17233 | |
17234 | |
17235 | if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || |
17236 | isShuffleMaskInputInPlace(1, Mask)))) |
17237 | if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask( |
17238 | DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) |
17239 | return V; |
17240 | |
17241 | |
17242 | if (Subtarget.hasVLX()) |
17243 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2, |
17244 | DAG, Subtarget)) |
17245 | return V; |
17246 | |
17247 | |
17248 | |
17249 | if (Subtarget.hasAVX2()) |
17250 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, |
17251 | Subtarget, DAG); |
17252 | |
17253 | |
17254 | return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, |
17255 | Subtarget, DAG); |
17256 | } |
17257 | |
17258 | |
17259 | |
17260 | |
17261 | |
17262 | static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17263 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17264 | const X86Subtarget &Subtarget, |
17265 | SelectionDAG &DAG) { |
17266 | assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); |
17267 | assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); |
17268 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
17269 | assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); |
17270 | |
17271 | if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4i64, V1, V2, Mask, Zeroable, |
17272 | Subtarget, DAG)) |
17273 | return V; |
17274 | |
17275 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, |
17276 | Zeroable, Subtarget, DAG)) |
17277 | return Blend; |
17278 | |
17279 | |
17280 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, |
17281 | Subtarget, DAG)) |
17282 | return Broadcast; |
17283 | |
17284 | if (V2.isUndef()) { |
17285 | |
17286 | |
17287 | SmallVector<int, 2> RepeatedMask; |
17288 | if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) { |
17289 | SmallVector<int, 4> PSHUFDMask; |
17290 | narrowShuffleMaskElts(2, RepeatedMask, PSHUFDMask); |
17291 | return DAG.getBitcast( |
17292 | MVT::v4i64, |
17293 | DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, |
17294 | DAG.getBitcast(MVT::v8i32, V1), |
17295 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
17296 | } |
17297 | |
17298 | |
17299 | |
17300 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1, |
17301 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
17302 | } |
17303 | |
17304 | |
17305 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, |
17306 | Zeroable, Subtarget, DAG)) |
17307 | return Shift; |
17308 | |
17309 | |
17310 | if (Subtarget.hasVLX()) { |
17311 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i64, V1, V2, Mask, |
17312 | Subtarget, DAG)) |
17313 | return Rotate; |
17314 | |
17315 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2, |
17316 | DAG, Subtarget)) |
17317 | return V; |
17318 | } |
17319 | |
17320 | |
17321 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask, |
17322 | Subtarget, DAG)) |
17323 | return Rotate; |
17324 | |
17325 | |
17326 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) |
17327 | return V; |
17328 | |
17329 | |
17330 | |
17331 | if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) |
17332 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, |
17333 | Subtarget, DAG); |
17334 | |
17335 | |
17336 | |
17337 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17338 | DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) |
17339 | return V; |
17340 | |
17341 | |
17342 | |
17343 | |
17344 | |
17345 | if (!isShuffleMaskInputInPlace(0, Mask) && |
17346 | !isShuffleMaskInputInPlace(1, Mask)) |
17347 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17348 | DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) |
17349 | return Result; |
17350 | |
17351 | |
17352 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, |
17353 | Subtarget, DAG); |
17354 | } |
17355 | |
17356 | |
17357 | |
17358 | |
17359 | |
17360 | static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17361 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17362 | const X86Subtarget &Subtarget, |
17363 | SelectionDAG &DAG) { |
17364 | assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); |
17365 | assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); |
17366 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
17367 | |
17368 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, |
| |
17369 | Zeroable, Subtarget, DAG)) |
17370 | return Blend; |
17371 | |
17372 | |
17373 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, |
| |
17374 | Subtarget, DAG)) |
17375 | return Broadcast; |
17376 | |
17377 | |
17378 | |
17379 | SmallVector<int, 4> RepeatedMask; |
17380 | if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) { |
| 3 | | Assuming the condition is false | |
|
| |
17381 | assert(RepeatedMask.size() == 4 && |
17382 | "Repeated masks must be half the mask width!"); |
17383 | |
17384 | |
17385 | if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2)) |
17386 | return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1); |
17387 | if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2)) |
17388 | return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1); |
17389 | |
17390 | if (V2.isUndef()) |
17391 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1, |
17392 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
17393 | |
17394 | |
17395 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG)) |
17396 | return V; |
17397 | |
17398 | |
17399 | |
17400 | return lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG); |
17401 | } |
17402 | |
17403 | |
17404 | |
17405 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 5 | | Calling 'lowerShuffleAsRepeatedMaskAndLanePermute' | |
|
17406 | DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) |
17407 | return V; |
17408 | |
17409 | |
17410 | |
17411 | if (V2.isUndef()) { |
17412 | if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask)) { |
17413 | SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); |
17414 | return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1, VPermMask); |
17415 | } |
17416 | if (Subtarget.hasAVX2()) { |
17417 | SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); |
17418 | return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1); |
17419 | } |
17420 | |
17421 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask, |
17422 | DAG, Subtarget); |
17423 | } |
17424 | |
17425 | |
17426 | |
17427 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17428 | DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) |
17429 | return Result; |
17430 | |
17431 | |
17432 | if (Subtarget.hasVLX()) |
17433 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2, |
17434 | DAG, Subtarget)) |
17435 | return V; |
17436 | |
17437 | |
17438 | |
17439 | |
17440 | if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32)) |
17441 | return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, |
17442 | DAG); |
17443 | |
17444 | |
17445 | |
17446 | if (Subtarget.hasAVX2()) |
17447 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8f32, V1, V2, Mask, |
17448 | Subtarget, DAG); |
17449 | |
17450 | |
17451 | return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, |
17452 | Subtarget, DAG); |
17453 | } |
17454 | |
17455 | |
17456 | |
17457 | |
17458 | |
17459 | static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17460 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17461 | const X86Subtarget &Subtarget, |
17462 | SelectionDAG &DAG) { |
17463 | assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); |
17464 | assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); |
17465 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
17466 | assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!"); |
17467 | |
17468 | |
17469 | |
17470 | |
17471 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask, |
17472 | Zeroable, Subtarget, DAG)) |
17473 | return ZExt; |
17474 | |
17475 | |
17476 | |
17477 | |
17478 | if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() && |
17479 | !Subtarget.hasAVX512()) |
17480 | return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, |
17481 | DAG); |
17482 | |
17483 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, |
17484 | Zeroable, Subtarget, DAG)) |
17485 | return Blend; |
17486 | |
17487 | |
17488 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, |
17489 | Subtarget, DAG)) |
17490 | return Broadcast; |
17491 | |
17492 | |
17493 | |
17494 | |
17495 | SmallVector<int, 4> RepeatedMask; |
17496 | bool Is128BitLaneRepeatedShuffle = |
17497 | is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask); |
17498 | if (Is128BitLaneRepeatedShuffle) { |
17499 | assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); |
17500 | if (V2.isUndef()) |
17501 | return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1, |
17502 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
17503 | |
17504 | |
17505 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG)) |
17506 | return V; |
17507 | } |
17508 | |
17509 | |
17510 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, |
17511 | Zeroable, Subtarget, DAG)) |
17512 | return Shift; |
17513 | |
17514 | |
17515 | if (Subtarget.hasVLX()) { |
17516 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i32, V1, V2, Mask, |
17517 | Subtarget, DAG)) |
17518 | return Rotate; |
17519 | |
17520 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2, |
17521 | DAG, Subtarget)) |
17522 | return V; |
17523 | } |
17524 | |
17525 | |
17526 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i32, V1, V2, Mask, |
17527 | Subtarget, DAG)) |
17528 | return Rotate; |
17529 | |
17530 | |
17531 | |
17532 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17533 | DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) |
17534 | return V; |
17535 | |
17536 | if (V2.isUndef()) { |
17537 | |
17538 | |
17539 | if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v8i32, Mask, V1, V2, DAG)) |
17540 | return V; |
17541 | |
17542 | |
17543 | |
17544 | SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); |
17545 | return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1); |
17546 | } |
17547 | |
17548 | |
17549 | |
17550 | |
17551 | if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { |
17552 | SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1); |
17553 | SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2); |
17554 | SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, |
17555 | CastV1, CastV2, DAG); |
17556 | return DAG.getBitcast(MVT::v8i32, ShufPS); |
17557 | } |
17558 | |
17559 | |
17560 | |
17561 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17562 | DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) |
17563 | return Result; |
17564 | |
17565 | |
17566 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i32, V1, V2, Mask, |
17567 | Subtarget, DAG); |
17568 | } |
17569 | |
17570 | |
17571 | |
17572 | |
17573 | |
17574 | static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17575 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17576 | const X86Subtarget &Subtarget, |
17577 | SelectionDAG &DAG) { |
17578 | assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); |
17579 | assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); |
17580 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
17581 | assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!"); |
17582 | |
17583 | |
17584 | |
17585 | |
17586 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
17587 | DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
17588 | return ZExt; |
17589 | |
17590 | |
17591 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, Mask, |
17592 | Subtarget, DAG)) |
17593 | return Broadcast; |
17594 | |
17595 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask, |
17596 | Zeroable, Subtarget, DAG)) |
17597 | return Blend; |
17598 | |
17599 | |
17600 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG)) |
17601 | return V; |
17602 | |
17603 | |
17604 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, |
17605 | Subtarget)) |
17606 | return V; |
17607 | |
17608 | |
17609 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i16, V1, V2, Mask, Zeroable, |
17610 | Subtarget, DAG)) |
17611 | return V; |
17612 | |
17613 | |
17614 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, |
17615 | Zeroable, Subtarget, DAG)) |
17616 | return Shift; |
17617 | |
17618 | |
17619 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i16, V1, V2, Mask, |
17620 | Subtarget, DAG)) |
17621 | return Rotate; |
17622 | |
17623 | |
17624 | |
17625 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17626 | DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) |
17627 | return V; |
17628 | |
17629 | if (V2.isUndef()) { |
17630 | |
17631 | if (SDValue Rotate = |
17632 | lowerShuffleAsBitRotate(DL, MVT::v16i16, V1, Mask, Subtarget, DAG)) |
17633 | return Rotate; |
17634 | |
17635 | |
17636 | |
17637 | if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v16i16, Mask, V1, V2, DAG)) |
17638 | return V; |
17639 | |
17640 | |
17641 | |
17642 | if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) { |
17643 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
17644 | DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) |
17645 | return V; |
17646 | |
17647 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v16i16, V1, V2, Mask, |
17648 | DAG, Subtarget); |
17649 | } |
17650 | |
17651 | SmallVector<int, 8> RepeatedMask; |
17652 | if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { |
17653 | |
17654 | |
17655 | |
17656 | return lowerV8I16GeneralSingleInputShuffle( |
17657 | DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG); |
17658 | } |
17659 | } |
17660 | |
17661 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1, V2, |
17662 | Zeroable, Subtarget, DAG)) |
17663 | return PSHUFB; |
17664 | |
17665 | |
17666 | if (Subtarget.hasBWI()) |
17667 | return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, Subtarget, DAG); |
17668 | |
17669 | |
17670 | |
17671 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17672 | DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) |
17673 | return Result; |
17674 | |
17675 | |
17676 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
17677 | DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) |
17678 | return V; |
17679 | |
17680 | |
17681 | return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, |
17682 | Subtarget, DAG); |
17683 | } |
17684 | |
17685 | |
17686 | |
17687 | |
17688 | |
17689 | static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17690 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17691 | const X86Subtarget &Subtarget, |
17692 | SelectionDAG &DAG) { |
17693 | assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); |
17694 | assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); |
17695 | assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); |
17696 | assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!"); |
17697 | |
17698 | |
17699 | |
17700 | |
17701 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2, Mask, |
17702 | Zeroable, Subtarget, DAG)) |
17703 | return ZExt; |
17704 | |
17705 | |
17706 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, |
17707 | Subtarget, DAG)) |
17708 | return Broadcast; |
17709 | |
17710 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask, |
17711 | Zeroable, Subtarget, DAG)) |
17712 | return Blend; |
17713 | |
17714 | |
17715 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG)) |
17716 | return V; |
17717 | |
17718 | |
17719 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, |
17720 | Subtarget)) |
17721 | return V; |
17722 | |
17723 | |
17724 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v32i8, V1, V2, Mask, Zeroable, |
17725 | Subtarget, DAG)) |
17726 | return V; |
17727 | |
17728 | |
17729 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, |
17730 | Zeroable, Subtarget, DAG)) |
17731 | return Shift; |
17732 | |
17733 | |
17734 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i8, V1, V2, Mask, |
17735 | Subtarget, DAG)) |
17736 | return Rotate; |
17737 | |
17738 | |
17739 | if (V2.isUndef()) |
17740 | if (SDValue Rotate = |
17741 | lowerShuffleAsBitRotate(DL, MVT::v32i8, V1, Mask, Subtarget, DAG)) |
17742 | return Rotate; |
17743 | |
17744 | |
17745 | |
17746 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17747 | DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) |
17748 | return V; |
17749 | |
17750 | |
17751 | |
17752 | if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) { |
17753 | |
17754 | |
17755 | if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v32i8, Mask, V1, V2, DAG)) |
17756 | return V; |
17757 | |
17758 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
17759 | DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) |
17760 | return V; |
17761 | |
17762 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask, |
17763 | DAG, Subtarget); |
17764 | } |
17765 | |
17766 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2, |
17767 | Zeroable, Subtarget, DAG)) |
17768 | return PSHUFB; |
17769 | |
17770 | |
17771 | if (Subtarget.hasVBMI()) |
17772 | return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, Subtarget, DAG); |
17773 | |
17774 | |
17775 | |
17776 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17777 | DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) |
17778 | return Result; |
17779 | |
17780 | |
17781 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
17782 | DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) |
17783 | return V; |
17784 | |
17785 | |
17786 | |
17787 | |
17788 | if (Subtarget.hasVLX()) |
17789 | if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2, |
17790 | Mask, Zeroable, DAG)) |
17791 | return V; |
17792 | |
17793 | |
17794 | return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, |
17795 | Subtarget, DAG); |
17796 | } |
17797 | |
17798 | |
17799 | |
17800 | |
17801 | |
17802 | |
17803 | static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
17804 | SDValue V1, SDValue V2, const APInt &Zeroable, |
17805 | const X86Subtarget &Subtarget, |
17806 | SelectionDAG &DAG) { |
17807 | |
17808 | |
17809 | int NumElts = VT.getVectorNumElements(); |
17810 | int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); |
17811 | |
17812 | if (NumV2Elements == 1 && Mask[0] >= NumElts) |
17813 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
17814 | DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
17815 | return Insertion; |
17816 | |
17817 | |
17818 | if (SDValue V = |
17819 | lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
17820 | return V; |
17821 | |
17822 | |
17823 | |
17824 | |
17825 | |
17826 | |
17827 | |
17828 | if (VT.isInteger() && !Subtarget.hasAVX2()) { |
17829 | int ElementBits = VT.getScalarSizeInBits(); |
17830 | if (ElementBits < 32) { |
17831 | |
17832 | |
17833 | if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
17834 | Subtarget, DAG)) |
17835 | return V; |
17836 | if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) |
17837 | return V; |
17838 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
17839 | } |
17840 | |
17841 | MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits), |
17842 | VT.getVectorNumElements()); |
17843 | V1 = DAG.getBitcast(FpVT, V1); |
17844 | V2 = DAG.getBitcast(FpVT, V2); |
17845 | return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); |
17846 | } |
17847 | |
17848 | if (VT == MVT::v16f16) { |
17849 | V1 = DAG.getBitcast(MVT::v16i16, V1); |
17850 | V2 = DAG.getBitcast(MVT::v16i16, V2); |
17851 | return DAG.getBitcast(MVT::v16f16, |
17852 | DAG.getVectorShuffle(MVT::v16i16, DL, V1, V2, Mask)); |
17853 | } |
17854 | |
17855 | switch (VT.SimpleTy) { |
17856 | case MVT::v4f64: |
17857 | return lowerV4F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17858 | case MVT::v4i64: |
17859 | return lowerV4I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17860 | case MVT::v8f32: |
17861 | return lowerV8F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17862 | case MVT::v8i32: |
17863 | return lowerV8I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17864 | case MVT::v16i16: |
17865 | return lowerV16I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17866 | case MVT::v32i8: |
17867 | return lowerV32I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17868 | |
17869 | default: |
17870 | llvm_unreachable("Not a valid 256-bit x86 vector type!"); |
17871 | } |
17872 | } |
17873 | |
17874 | |
17875 | static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, |
17876 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17877 | const X86Subtarget &Subtarget, |
17878 | SelectionDAG &DAG) { |
17879 | assert(VT.getScalarSizeInBits() == 64 && |
17880 | "Unexpected element type size for 128bit shuffle."); |
17881 | |
17882 | |
17883 | |
17884 | assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle."); |
17885 | |
17886 | |
17887 | SmallVector<int, 4> Widened128Mask; |
17888 | if (!canWidenShuffleElements(Mask, Widened128Mask)) |
17889 | return SDValue(); |
17890 | assert(Widened128Mask.size() == 4 && "Shuffle widening mismatch"); |
17891 | |
17892 | |
17893 | if (Widened128Mask[0] == 0 && (Zeroable & 0xf0) == 0xf0 && |
17894 | (Widened128Mask[1] == 1 || (Zeroable & 0x0c) == 0x0c)) { |
17895 | unsigned NumElts = ((Zeroable & 0x0c) == 0x0c) ? 2 : 4; |
17896 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); |
17897 | SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, |
17898 | DAG.getIntPtrConstant(0, DL)); |
17899 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
17900 | getZeroVector(VT, Subtarget, DAG, DL), LoV, |
17901 | DAG.getIntPtrConstant(0, DL)); |
17902 | } |
17903 | |
17904 | |
17905 | |
17906 | bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}, V1, V2); |
17907 | if (OnlyUsesV1 || |
17908 | isShuffleEquivalent(Mask, {0, 1, 2, 3, 8, 9, 10, 11}, V1, V2)) { |
17909 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); |
17910 | SDValue SubVec = |
17911 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2, |
17912 | DAG.getIntPtrConstant(0, DL)); |
17913 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, |
17914 | DAG.getIntPtrConstant(4, DL)); |
17915 | } |
17916 | |
17917 | |
17918 | bool IsInsert = true; |
17919 | int V2Index = -1; |
17920 | for (int i = 0; i < 4; ++i) { |
17921 | assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value"); |
17922 | if (Widened128Mask[i] < 0) |
17923 | continue; |
17924 | |
17925 | |
17926 | if (Widened128Mask[i] < 4) { |
17927 | if (Widened128Mask[i] != i) { |
17928 | IsInsert = false; |
17929 | break; |
17930 | } |
17931 | } else { |
17932 | |
17933 | if (V2Index >= 0 || Widened128Mask[i] != 4) { |
17934 | IsInsert = false; |
17935 | break; |
17936 | } |
17937 | V2Index = i; |
17938 | } |
17939 | } |
17940 | if (IsInsert && V2Index >= 0) { |
17941 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); |
17942 | SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, |
17943 | DAG.getIntPtrConstant(0, DL)); |
17944 | return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL); |
17945 | } |
17946 | |
17947 | |
17948 | |
17949 | |
17950 | |
17951 | SmallVector<int, 2> Widened256Mask; |
17952 | if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) { |
17953 | Widened128Mask.clear(); |
17954 | narrowShuffleMaskElts(2, Widened256Mask, Widened128Mask); |
17955 | } |
17956 | |
17957 | |
17958 | SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; |
17959 | unsigned PermMask = 0; |
17960 | |
17961 | for (int i = 0; i < 4; ++i) { |
17962 | assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value"); |
17963 | if (Widened128Mask[i] < 0) |
17964 | continue; |
17965 | |
17966 | SDValue Op = Widened128Mask[i] >= 4 ? V2 : V1; |
17967 | unsigned OpIndex = i / 2; |
17968 | if (Ops[OpIndex].isUndef()) |
17969 | Ops[OpIndex] = Op; |
17970 | else if (Ops[OpIndex] != Op) |
17971 | return SDValue(); |
17972 | |
17973 | |
17974 | |
17975 | PermMask |= (Widened128Mask[i] % 4) << (i * 2); |
17976 | } |
17977 | |
17978 | return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], |
17979 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
17980 | } |
17981 | |
17982 | |
17983 | static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17984 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17985 | const X86Subtarget &Subtarget, |
17986 | SelectionDAG &DAG) { |
17987 | assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); |
17988 | assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); |
17989 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
17990 | |
17991 | if (V2.isUndef()) { |
17992 | |
17993 | if (isShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1, V2)) |
17994 | return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1); |
17995 | |
17996 | if (!is128BitLaneCrossingShuffleMask(MVT::v8f64, Mask)) { |
17997 | |
17998 | |
17999 | unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | |
18000 | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3) | |
18001 | ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) | |
18002 | ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7); |
18003 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1, |
18004 | DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); |
18005 | } |
18006 | |
18007 | SmallVector<int, 4> RepeatedMask; |
18008 | if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) |
18009 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8f64, V1, |
18010 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
18011 | } |
18012 | |
18013 | if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8f64, Mask, Zeroable, V1, |
18014 | V2, Subtarget, DAG)) |
18015 | return Shuf128; |
18016 | |
18017 | if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG)) |
18018 | return Unpck; |
18019 | |
18020 | |
18021 | if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, V1, V2, Mask, |
18022 | Zeroable, Subtarget, DAG)) |
18023 | return Op; |
18024 | |
18025 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2, |
18026 | DAG, Subtarget)) |
18027 | return V; |
18028 | |
18029 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask, |
18030 | Zeroable, Subtarget, DAG)) |
18031 | return Blend; |
18032 | |
18033 | return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, Subtarget, DAG); |
18034 | } |
18035 | |
18036 | |
18037 | static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18038 | const APInt &Zeroable, SDValue V1, SDValue V2, |
18039 | const X86Subtarget &Subtarget, |
18040 | SelectionDAG &DAG) { |
18041 | assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); |
18042 | assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); |
18043 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
18044 | |
18045 | |
18046 | |
18047 | SmallVector<int, 4> RepeatedMask; |
18048 | if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) { |
18049 | assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); |
18050 | |
18051 | |
18052 | if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2)) |
18053 | return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1); |
18054 | if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2)) |
18055 | return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1); |
18056 | |
18057 | if (V2.isUndef()) |
18058 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1, |
18059 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
18060 | |
18061 | |
18062 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) |
18063 | return V; |
18064 | |
18065 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask, |
18066 | Zeroable, Subtarget, DAG)) |
18067 | return Blend; |
18068 | |
18069 | |
18070 | return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG); |
18071 | } |
18072 | |
18073 | |
18074 | |
18075 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
18076 | DL, MVT::v16f32, V1, V2, Mask, Subtarget, DAG)) |
18077 | return V; |
18078 | |
18079 | |
18080 | |
18081 | if (V2.isUndef() && |
18082 | !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) { |
18083 | SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true); |
18084 | return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask); |
18085 | } |
18086 | |
18087 | |
18088 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, |
18089 | V1, V2, DAG, Subtarget)) |
18090 | return V; |
18091 | |
18092 | return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, Subtarget, DAG); |
18093 | } |
18094 | |
18095 | |
18096 | static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18097 | const APInt &Zeroable, SDValue V1, SDValue V2, |
18098 | const X86Subtarget &Subtarget, |
18099 | SelectionDAG &DAG) { |
18100 | assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); |
18101 | assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); |
18102 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
18103 | |
18104 | if (V2.isUndef()) { |
18105 | |
18106 | |
18107 | |
18108 | SmallVector<int, 2> Repeated128Mask; |
18109 | if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) { |
18110 | SmallVector<int, 4> PSHUFDMask; |
18111 | narrowShuffleMaskElts(2, Repeated128Mask, PSHUFDMask); |
18112 | return DAG.getBitcast( |
18113 | MVT::v8i64, |
18114 | DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, |
18115 | DAG.getBitcast(MVT::v16i32, V1), |
18116 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
18117 | } |
18118 | |
18119 | SmallVector<int, 4> Repeated256Mask; |
18120 | if (is256BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated256Mask)) |
18121 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8i64, V1, |
18122 | getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG)); |
18123 | } |
18124 | |
18125 | if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8i64, Mask, Zeroable, V1, |
18126 | V2, Subtarget, DAG)) |
18127 | return Shuf128; |
18128 | |
18129 | |
18130 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, |
18131 | Zeroable, Subtarget, DAG)) |
18132 | return Shift; |
18133 | |
18134 | |
18135 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i64, V1, V2, Mask, |
18136 | Subtarget, DAG)) |
18137 | return Rotate; |
18138 | |
18139 | |
18140 | if (Subtarget.hasBWI()) |
18141 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask, |
18142 | Subtarget, DAG)) |
18143 | return Rotate; |
18144 | |
18145 | if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) |
18146 | return Unpck; |
18147 | |
18148 | |
18149 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2, |
18150 | DAG, Subtarget)) |
18151 | return V; |
18152 | |
18153 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask, |
18154 | Zeroable, Subtarget, DAG)) |
18155 | return Blend; |
18156 | |
18157 | return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, Subtarget, DAG); |
18158 | } |
18159 | |
18160 | |
18161 | static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18162 | const APInt &Zeroable, SDValue V1, SDValue V2, |
18163 | const X86Subtarget &Subtarget, |
18164 | SelectionDAG &DAG) { |
18165 | assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); |
18166 | assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); |
18167 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
18168 | |
18169 | |
18170 | |
18171 | |
18172 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
18173 | DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
18174 | return ZExt; |
18175 | |
18176 | |
18177 | |
18178 | |
18179 | SmallVector<int, 4> RepeatedMask; |
18180 | bool Is128BitLaneRepeatedShuffle = |
18181 | is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask); |
18182 | if (Is128BitLaneRepeatedShuffle) { |
18183 | assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); |
18184 | if (V2.isUndef()) |
18185 | return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1, |
18186 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
18187 | |
18188 | |
18189 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) |
18190 | return V; |
18191 | } |
18192 | |
18193 | |
18194 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask, |
18195 | Zeroable, Subtarget, DAG)) |
18196 | return Shift; |
18197 | |
18198 | |
18199 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v16i32, V1, V2, Mask, |
18200 | Subtarget, DAG)) |
18201 | return Rotate; |
18202 | |
18203 | |
18204 | if (Subtarget.hasBWI()) |
18205 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i32, V1, V2, Mask, |
18206 | Subtarget, DAG)) |
18207 | return Rotate; |
18208 | |
18209 | |
18210 | |
18211 | if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { |
18212 | SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1); |
18213 | SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2); |
18214 | SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, |
18215 | CastV1, CastV2, DAG); |
18216 | return DAG.getBitcast(MVT::v16i32, ShufPS); |
18217 | } |
18218 | |
18219 | |
18220 | |
18221 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
18222 | DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) |
18223 | return V; |
18224 | |
18225 | |
18226 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2, |
18227 | DAG, Subtarget)) |
18228 | return V; |
18229 | |
18230 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask, |
18231 | Zeroable, Subtarget, DAG)) |
18232 | return Blend; |
18233 | |
18234 | return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, Subtarget, DAG); |
18235 | } |
18236 | |
18237 | |
18238 | static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18239 | const APInt &Zeroable, SDValue V1, SDValue V2, |
18240 | const X86Subtarget &Subtarget, |
18241 | SelectionDAG &DAG) { |
18242 | assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); |
18243 | assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); |
18244 | assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); |
18245 | assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); |
18246 | |
18247 | |
18248 | |
18249 | |
18250 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
18251 | DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
18252 | return ZExt; |
18253 | |
18254 | |
18255 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) |
18256 | return V; |
18257 | |
18258 | |
18259 | if (SDValue V = |
18260 | lowerShuffleWithPACK(DL, MVT::v32i16, Mask, V1, V2, DAG, Subtarget)) |
18261 | return V; |
18262 | |
18263 | |
18264 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, |
18265 | Zeroable, Subtarget, DAG)) |
18266 | return Shift; |
18267 | |
18268 | |
18269 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i16, V1, V2, Mask, |
18270 | Subtarget, DAG)) |
18271 | return Rotate; |
18272 | |
18273 | if (V2.isUndef()) { |
18274 | |
18275 | if (SDValue Rotate = |
18276 | lowerShuffleAsBitRotate(DL, MVT::v32i16, V1, Mask, Subtarget, DAG)) |
18277 | return Rotate; |
18278 | |
18279 | SmallVector<int, 8> RepeatedMask; |
18280 | if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) { |
18281 | |
18282 | |
18283 | |
18284 | return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v32i16, V1, |
18285 | RepeatedMask, Subtarget, DAG); |
18286 | } |
18287 | } |
18288 | |
18289 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask, |
18290 | Zeroable, Subtarget, DAG)) |
18291 | return Blend; |
18292 | |
18293 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2, |
18294 | Zeroable, Subtarget, DAG)) |
18295 | return PSHUFB; |
18296 | |
18297 | return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, Subtarget, DAG); |
18298 | } |
18299 | |
18300 | |
18301 | static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18302 | const APInt &Zeroable, SDValue V1, SDValue V2, |
18303 | const X86Subtarget &Subtarget, |
18304 | SelectionDAG &DAG) { |
18305 | assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); |
18306 | assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); |
18307 | assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!"); |
18308 | assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); |
18309 | |
18310 | |
18311 | |
18312 | |
18313 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
18314 | DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
18315 | return ZExt; |
18316 | |
18317 | |
18318 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG)) |
18319 | return V; |
18320 | |
18321 | |
18322 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG, |
18323 | Subtarget)) |
18324 | return V; |
18325 | |
18326 | |
18327 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask, |
18328 | Zeroable, Subtarget, DAG)) |
18329 | return Shift; |
18330 | |
18331 | |
18332 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v64i8, V1, V2, Mask, |
18333 | Subtarget, DAG)) |
18334 | return Rotate; |
18335 | |
18336 | |
18337 | if (V2.isUndef()) |
18338 | if (SDValue Rotate = |
18339 | lowerShuffleAsBitRotate(DL, MVT::v64i8, V1, Mask, Subtarget, DAG)) |
18340 | return Rotate; |
18341 | |
18342 | |
18343 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v64i8, V1, V2, Mask, |
18344 | Zeroable, Subtarget, DAG)) |
18345 | return Masked; |
18346 | |
18347 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2, |
18348 | Zeroable, Subtarget, DAG)) |
18349 | return PSHUFB; |
18350 | |
18351 | |
18352 | if (Subtarget.hasVBMI()) |
18353 | return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG); |
18354 | |
18355 | |
18356 | |
18357 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
18358 | DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) |
18359 | return V; |
18360 | |
18361 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask, |
18362 | Zeroable, Subtarget, DAG)) |
18363 | return Blend; |
18364 | |
18365 | |
18366 | |
18367 | if (!V2.isUndef()) |
18368 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
18369 | DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) |
18370 | return Result; |
18371 | |
18372 | |
18373 | return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); |
18374 | } |
18375 | |
18376 | |
18377 | |
18378 | |
18379 | |
18380 | |
18381 | static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18382 | MVT VT, SDValue V1, SDValue V2, |
18383 | const APInt &Zeroable, |
18384 | const X86Subtarget &Subtarget, |
18385 | SelectionDAG &DAG) { |
18386 | assert(Subtarget.hasAVX512() && |
18387 | "Cannot lower 512-bit vectors w/ basic ISA!"); |
18388 | |
18389 | |
18390 | |
18391 | int NumElts = Mask.size(); |
18392 | int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); |
18393 | |
18394 | if (NumV2Elements == 1 && Mask[0] >= NumElts) |
18395 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
18396 | DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
18397 | return Insertion; |
18398 | |
18399 | |
18400 | if (SDValue V = |
18401 | lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
18402 | return V; |
18403 | |
18404 | |
18405 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, |
18406 | Subtarget, DAG)) |
18407 | return Broadcast; |
18408 | |
18409 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) { |
18410 | |
18411 | |
18412 | if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
18413 | Subtarget, DAG)) |
18414 | return V; |
18415 | if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) |
18416 | return V; |
18417 | |
18418 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
18419 | } |
18420 | |
18421 | if (VT == MVT::v32f16) { |
18422 | V1 = DAG.getBitcast(MVT::v32i16, V1); |
18423 | V2 = DAG.getBitcast(MVT::v32i16, V2); |
18424 | return DAG.getBitcast(MVT::v32f16, |
18425 | DAG.getVectorShuffle(MVT::v32i16, DL, V1, V2, Mask)); |
18426 | } |
18427 | |
18428 | |
18429 | |
18430 | |
18431 | |
18432 | switch (VT.SimpleTy) { |
18433 | case MVT::v8f64: |
18434 | return lowerV8F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18435 | case MVT::v16f32: |
18436 | return lowerV16F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18437 | case MVT::v8i64: |
18438 | return lowerV8I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18439 | case MVT::v16i32: |
18440 | return lowerV16I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18441 | case MVT::v32i16: |
18442 | return lowerV32I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18443 | case MVT::v64i8: |
18444 | return lowerV64I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18445 | |
18446 | default: |
18447 | llvm_unreachable("Not a valid 512-bit x86 vector type!"); |
18448 | } |
18449 | } |
18450 | |
18451 | static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask, |
18452 | MVT VT, SDValue V1, SDValue V2, |
18453 | const X86Subtarget &Subtarget, |
18454 | SelectionDAG &DAG) { |
18455 | |
18456 | if (!V2.isUndef()) |
18457 | return SDValue(); |
18458 | |
18459 | int ShiftAmt = -1; |
18460 | int NumElts = Mask.size(); |
18461 | for (int i = 0; i != NumElts; ++i) { |
18462 | int M = Mask[i]; |
18463 | assert((M == SM_SentinelUndef || (0 <= M && M < NumElts)) && |
18464 | "Unexpected mask index."); |
18465 | if (M < 0) |
18466 | continue; |
18467 | |
18468 | |
18469 | if (ShiftAmt < 0) { |
18470 | ShiftAmt = M - i; |
18471 | |
18472 | if (ShiftAmt <= 0) |
18473 | return SDValue(); |
18474 | } |
18475 | |
18476 | if (ShiftAmt != M - i) |
18477 | return SDValue(); |
18478 | } |
18479 | assert(ShiftAmt >= 0 && "All undef?"); |
18480 | |
18481 | |
18482 | MVT WideVT = VT; |
18483 | if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) |
18484 | WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
18485 | SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, |
18486 | DAG.getUNDEF(WideVT), V1, |
18487 | DAG.getIntPtrConstant(0, DL)); |
18488 | Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res, |
18489 | DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
18490 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
18491 | DAG.getIntPtrConstant(0, DL)); |
18492 | } |
18493 | |
18494 | |
18495 | |
18496 | |
18497 | static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask, |
18498 | int MaskOffset, const APInt &Zeroable) { |
18499 | int Size = Mask.size(); |
18500 | |
18501 | auto CheckZeros = [&](int Shift, bool Left) { |
18502 | for (int j = 0; j < Shift; ++j) |
18503 | if (!Zeroable[j + (Left ? 0 : (Size - Shift))]) |
18504 | return false; |
18505 | |
18506 | return true; |
18507 | }; |
18508 | |
18509 | auto MatchShift = [&](int Shift, bool Left) { |
18510 | unsigned Pos = Left ? Shift : 0; |
18511 | unsigned Low = Left ? 0 : Shift; |
18512 | unsigned Len = Size - Shift; |
18513 | return isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset); |
18514 | }; |
18515 | |
18516 | for (int Shift = 1; Shift != Size; ++Shift) |
18517 | for (bool Left : {true, false}) |
18518 | if (CheckZeros(Shift, Left) && MatchShift(Shift, Left)) { |
18519 | Opcode = Left ? X86ISD::KSHIFTL : X86ISD::KSHIFTR; |
18520 | return Shift; |
18521 | } |
18522 | |
18523 | return -1; |
18524 | } |
18525 | |
18526 | |
18527 | |
18528 | |
18529 | |
18530 | |
18531 | static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18532 | MVT VT, SDValue V1, SDValue V2, |
18533 | const APInt &Zeroable, |
18534 | const X86Subtarget &Subtarget, |
18535 | SelectionDAG &DAG) { |
18536 | assert(Subtarget.hasAVX512() && |
18537 | "Cannot lower 512-bit vectors w/o basic ISA!"); |
18538 | |
18539 | int NumElts = Mask.size(); |
18540 | |
18541 | |
18542 | int SubvecElts = 0; |
18543 | int Src = -1; |
18544 | for (int i = 0; i != NumElts; ++i) { |
18545 | if (Mask[i] >= 0) { |
18546 | |
18547 | |
18548 | if (Src < 0) |
18549 | Src = Mask[i] / NumElts; |
18550 | if (Src != (Mask[i] / NumElts) || (Mask[i] % NumElts) != i) |
18551 | break; |
18552 | } |
18553 | |
18554 | ++SubvecElts; |
18555 | } |
18556 | assert(SubvecElts != NumElts && "Identity shuffle?"); |
18557 | |
18558 | |
18559 | SubvecElts = PowerOf2Floor(SubvecElts); |
18560 | |
18561 | |
18562 | |
18563 | if ((int)Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) { |
18564 | assert(Src >= 0 && "Expected a source!"); |
18565 | MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts); |
18566 | SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, |
18567 | Src == 0 ? V1 : V2, |
18568 | DAG.getIntPtrConstant(0, DL)); |
18569 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
18570 | DAG.getConstant(0, DL, VT), |
18571 | Extract, DAG.getIntPtrConstant(0, DL)); |
18572 | } |
18573 | |
18574 | |
18575 | if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget, |
18576 | DAG)) |
18577 | return Shift; |
18578 | |
18579 | |
18580 | unsigned Offset = 0; |
18581 | for (SDValue V : { V1, V2 }) { |
18582 | unsigned Opcode; |
18583 | int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); |
18584 | if (ShiftAmt >= 0) { |
18585 | MVT WideVT = VT; |
18586 | if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) |
18587 | WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
18588 | SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, |
18589 | DAG.getUNDEF(WideVT), V, |
18590 | DAG.getIntPtrConstant(0, DL)); |
18591 | |
18592 | if (Opcode == X86ISD::KSHIFTR && WideVT != VT) { |
18593 | int WideElts = WideVT.getVectorNumElements(); |
18594 | |
18595 | Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res, |
18596 | DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8)); |
18597 | |
18598 | ShiftAmt += WideElts - NumElts; |
18599 | } |
18600 | |
18601 | Res = DAG.getNode(Opcode, DL, WideVT, Res, |
18602 | DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
18603 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
18604 | DAG.getIntPtrConstant(0, DL)); |
18605 | } |
18606 | Offset += NumElts; |
18607 | } |
18608 | |
18609 | |
18610 | |
18611 | MVT ExtVT; |
18612 | switch (VT.SimpleTy) { |
18613 | default: |
18614 | llvm_unreachable("Expected a vector of i1 elements"); |
18615 | case MVT::v2i1: |
18616 | ExtVT = MVT::v2i64; |
18617 | break; |
18618 | case MVT::v4i1: |
18619 | ExtVT = MVT::v4i32; |
18620 | break; |
18621 | case MVT::v8i1: |
18622 | |
18623 | |
18624 | ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64; |
18625 | break; |
18626 | case MVT::v16i1: |
18627 | |
18628 | |
18629 | ExtVT = Subtarget.canExtendTo512DQ() ? MVT::v16i32 : MVT::v16i16; |
18630 | break; |
18631 | case MVT::v32i1: |
18632 | |
18633 | |
18634 | assert(Subtarget.hasBWI() && "Expected AVX512BW support"); |
18635 | ExtVT = Subtarget.canExtendTo512BW() ? MVT::v32i16 : MVT::v32i8; |
18636 | break; |
18637 | case MVT::v64i1: |
18638 | |
18639 | |
18640 | if (!Subtarget.useBWIRegs()) |
18641 | return SDValue(); |
18642 | ExtVT = MVT::v64i8; |
18643 | break; |
18644 | } |
18645 | |
18646 | V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1); |
18647 | V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2); |
18648 | |
18649 | SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask); |
18650 | |
18651 | int NumElems = VT.getVectorNumElements(); |
18652 | if ((Subtarget.hasBWI() && (NumElems >= 32)) || |
18653 | (Subtarget.hasDQI() && (NumElems < 32))) |
18654 | return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), |
18655 | Shuffle, ISD::SETGT); |
18656 | |
18657 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle); |
18658 | } |
18659 | |
18660 | |
18661 | |
18662 | static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) { |
18663 | int NumElements = Mask.size(); |
18664 | |
18665 | int NumV1Elements = 0, NumV2Elements = 0; |
18666 | for (int M : Mask) |
18667 | if (M < 0) |
18668 | continue; |
18669 | else if (M < NumElements) |
18670 | ++NumV1Elements; |
18671 | else |
18672 | ++NumV2Elements; |
18673 | |
18674 | |
18675 | |
18676 | |
18677 | if (NumV2Elements > NumV1Elements) |
18678 | return true; |
18679 | |
18680 | assert(NumV1Elements > 0 && "No V1 indices"); |
18681 | |
18682 | if (NumV2Elements == 0) |
18683 | return false; |
18684 | |
18685 | |
18686 | |
18687 | |
18688 | |
18689 | |
18690 | if (NumV1Elements == NumV2Elements) { |
18691 | int LowV1Elements = 0, LowV2Elements = 0; |
18692 | for (int M : Mask.slice(0, NumElements / 2)) |
18693 | if (M >= NumElements) |
18694 | ++LowV2Elements; |
18695 | else if (M >= 0) |
18696 | ++LowV1Elements; |
18697 | if (LowV2Elements > LowV1Elements) |
18698 | return true; |
18699 | if (LowV2Elements == LowV1Elements) { |
18700 | int SumV1Indices = 0, SumV2Indices = 0; |
18701 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
18702 | if (Mask[i] >= NumElements) |
18703 | SumV2Indices += i; |
18704 | else if (Mask[i] >= 0) |
18705 | SumV1Indices += i; |
18706 | if (SumV2Indices < SumV1Indices) |
18707 | return true; |
18708 | if (SumV2Indices == SumV1Indices) { |
18709 | int NumV1OddIndices = 0, NumV2OddIndices = 0; |
18710 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
18711 | if (Mask[i] >= NumElements) |
18712 | NumV2OddIndices += i % 2; |
18713 | else if (Mask[i] >= 0) |
18714 | NumV1OddIndices += i % 2; |
18715 | if (NumV2OddIndices < NumV1OddIndices) |
18716 | return true; |
18717 | } |
18718 | } |
18719 | } |
18720 | |
18721 | return false; |
18722 | } |
18723 | |
18724 | |
18725 | static SDValue canonicalizeShuffleMaskWithHorizOp( |
18726 | MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask, |
18727 | unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG, |
18728 | const X86Subtarget &Subtarget); |
18729 | |
18730 | |
18731 | |
18732 | |
18733 | |
18734 | |
18735 | |
18736 | |
18737 | static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget, |
18738 | SelectionDAG &DAG) { |
18739 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); |
18740 | ArrayRef<int> OrigMask = SVOp->getMask(); |
18741 | SDValue V1 = Op.getOperand(0); |
18742 | SDValue V2 = Op.getOperand(1); |
18743 | MVT VT = Op.getSimpleValueType(); |
18744 | int NumElements = VT.getVectorNumElements(); |
18745 | SDLoc DL(Op); |
18746 | bool Is1BitVector = (VT.getVectorElementType() == MVT::i1); |
18747 | |
18748 | assert((VT.getSizeInBits() != 64 || Is1BitVector) && |
18749 | "Can't lower MMX shuffles"); |
18750 | |
18751 | bool V1IsUndef = V1.isUndef(); |
18752 | bool V2IsUndef = V2.isUndef(); |
18753 | if (V1IsUndef && V2IsUndef) |
18754 | return DAG.getUNDEF(VT); |
18755 | |
18756 | |
18757 | |
18758 | |
18759 | if (V1IsUndef) |
18760 | return DAG.getCommutedVectorShuffle(*SVOp); |
18761 | |
18762 | |
18763 | |
18764 | |
18765 | if (V2IsUndef && |
18766 | any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { |
18767 | SmallVector<int, 8> NewMask(OrigMask.begin(), OrigMask.end()); |
18768 | for (int &M : NewMask) |
18769 | if (M >= NumElements) |
18770 | M = -1; |
18771 | return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); |
18772 | } |
18773 | |
18774 | |
18775 | int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); |
18776 | (void)MaskUpperLimit; |
18777 | assert(llvm::all_of(OrigMask, |
18778 | [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && |
18779 | "Out of bounds shuffle index"); |
18780 | |
18781 | |
18782 | |
18783 | |
18784 | APInt KnownUndef, KnownZero; |
18785 | computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero); |
18786 | |
18787 | APInt Zeroable = KnownUndef | KnownZero; |
18788 | if (Zeroable.isAllOnesValue()) |
18789 | return getZeroVector(VT, Subtarget, DAG, DL); |
18790 | |
18791 | bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode()); |
18792 | |
18793 | |
18794 | |
18795 | |
18796 | |
18797 | SmallVector<int, 16> WidenedMask; |
18798 | if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && |
18799 | canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) { |
18800 | |
18801 | |
18802 | |
18803 | |
18804 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask, |
18805 | Subtarget, DAG)) |
18806 | return Broadcast; |
18807 | |
18808 | MVT NewEltVT = VT.isFloatingPoint() |
18809 | ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2) |
18810 | : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2); |
18811 | int NewNumElts = NumElements / 2; |
18812 | MVT NewVT = MVT::getVectorVT(NewEltVT, NewNumElts); |
18813 | |
18814 | |
18815 | if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) { |
18816 | if (V2IsZero) { |
18817 | |
18818 | |
18819 | bool UsedZeroVector = false; |
18820 | assert(is_contained(WidenedMask, SM_SentinelZero) && |
18821 | "V2's non-undef elements are used?!"); |
18822 | for (int i = 0; i != NewNumElts; ++i) |
18823 | if (WidenedMask[i] == SM_SentinelZero) { |
18824 | WidenedMask[i] = i + NewNumElts; |
18825 | UsedZeroVector = true; |
18826 | } |
18827 | |
18828 | |
18829 | if (UsedZeroVector) |
18830 | V2 = getZeroVector(NewVT, Subtarget, DAG, DL); |
18831 | } |
18832 | V1 = DAG.getBitcast(NewVT, V1); |
18833 | V2 = DAG.getBitcast(NewVT, V2); |
18834 | return DAG.getBitcast( |
18835 | VT, DAG.getVectorShuffle(NewVT, DL, V1, V2, WidenedMask)); |
18836 | } |
18837 | } |
18838 | |
18839 | SmallVector<SDValue> Ops = {V1, V2}; |
18840 | SmallVector<int> Mask(OrigMask.begin(), OrigMask.end()); |
18841 | |
18842 | |
18843 | |
18844 | if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp( |
18845 | Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget)) |
18846 | return DAG.getBitcast(VT, HOp); |
18847 | |
18848 | V1 = DAG.getBitcast(VT, Ops[0]); |
18849 | V2 = DAG.getBitcast(VT, Ops[1]); |
18850 | assert(NumElements == (int)Mask.size() && |
18851 | "canonicalizeShuffleMaskWithHorizOp " |
18852 | "shouldn't alter the shuffle mask size"); |
18853 | |
18854 | |
18855 | if (canonicalizeShuffleMaskWithCommute(Mask)) { |
18856 | ShuffleVectorSDNode::commuteMask(Mask); |
18857 | std::swap(V1, V2); |
18858 | } |
18859 | |
18860 | |
18861 | if (VT.is128BitVector()) |
18862 | return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
18863 | |
18864 | if (VT.is256BitVector()) |
18865 | return lower256BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
18866 | |
18867 | if (VT.is512BitVector()) |
18868 | return lower512BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
18869 | |
18870 | if (Is1BitVector) |
18871 | return lower1BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
18872 | |
18873 | llvm_unreachable("Unimplemented!"); |
18874 | } |
18875 | |
18876 | |
18877 | static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, |
18878 | const X86Subtarget &Subtarget, |
18879 | SelectionDAG &DAG) { |
18880 | SDValue Cond = Op.getOperand(0); |
18881 | SDValue LHS = Op.getOperand(1); |
18882 | SDValue RHS = Op.getOperand(2); |
18883 | MVT VT = Op.getSimpleValueType(); |
18884 | |
18885 | |
18886 | |
18887 | if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { |
18888 | SmallVector<int, 32> Mask; |
18889 | if (createShuffleMaskFromVSELECT(Mask, Cond)) |
18890 | return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask); |
18891 | } |
18892 | |
18893 | return SDValue(); |
18894 | } |
18895 | |
18896 | SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { |
18897 | SDValue Cond = Op.getOperand(0); |
18898 | SDValue LHS = Op.getOperand(1); |
18899 | SDValue RHS = Op.getOperand(2); |
18900 | |
18901 | |
18902 | |
18903 | if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) && |
18904 | ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) && |
18905 | ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) |
18906 | return SDValue(); |
18907 | |
18908 | |
18909 | |
18910 | if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) |
18911 | return BlendOp; |
18912 | |
18913 | |
18914 | |
18915 | MVT CondVT = Cond.getSimpleValueType(); |
18916 | unsigned CondEltSize = Cond.getScalarValueSizeInBits(); |
18917 | if (CondEltSize == 1) |
18918 | return Op; |
18919 | |
18920 | |
18921 | if (!Subtarget.hasSSE41()) |
18922 | return SDValue(); |
18923 | |
18924 | SDLoc dl(Op); |
18925 | MVT VT = Op.getSimpleValueType(); |
18926 | unsigned EltSize = VT.getScalarSizeInBits(); |
18927 | unsigned NumElts = VT.getVectorNumElements(); |
18928 | |
18929 | |
18930 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
18931 | return SDValue(); |
18932 | |
18933 | |
18934 | |
18935 | |
18936 | if (VT.getSizeInBits() == 512) { |
18937 | |
18938 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
18939 | SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond, |
18940 | DAG.getConstant(0, dl, CondVT), |
18941 | ISD::SETNE); |
18942 | |
18943 | return DAG.getSelect(dl, VT, Mask, LHS, RHS); |
18944 | } |
18945 | |
18946 | |
18947 | if (CondEltSize != EltSize) { |
18948 | |
18949 | if (CondEltSize != DAG.ComputeNumSignBits(Cond)) |
18950 | return SDValue(); |
18951 | |
18952 | MVT NewCondSVT = MVT::getIntegerVT(EltSize); |
18953 | MVT NewCondVT = MVT::getVectorVT(NewCondSVT, NumElts); |
18954 | Cond = DAG.getSExtOrTrunc(Cond, dl, NewCondVT); |
18955 | return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS); |
18956 | } |
18957 | |
18958 | |
18959 | |
18960 | |
18961 | switch (VT.SimpleTy) { |
18962 | default: |
18963 | |
18964 | return Op; |
18965 | |
18966 | case MVT::v32i8: |
18967 | |
18968 | if (Subtarget.hasAVX2()) |
18969 | return Op; |
18970 | |
18971 | return SDValue(); |
18972 | |
18973 | case MVT::v8i16: |
18974 | case MVT::v16i16: { |
18975 | |
18976 | MVT CastVT = MVT::getVectorVT(MVT::i8, NumElts * 2); |
18977 | Cond = DAG.getBitcast(CastVT, Cond); |
18978 | LHS = DAG.getBitcast(CastVT, LHS); |
18979 | RHS = DAG.getBitcast(CastVT, RHS); |
18980 | SDValue Select = DAG.getNode(ISD::VSELECT, dl, CastVT, Cond, LHS, RHS); |
18981 | return DAG.getBitcast(VT, Select); |
18982 | } |
18983 | } |
18984 | } |
18985 | |
18986 | static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { |
18987 | MVT VT = Op.getSimpleValueType(); |
18988 | SDValue Vec = Op.getOperand(0); |
18989 | SDValue Idx = Op.getOperand(1); |
18990 | assert(isa<ConstantSDNode>(Idx) && "Constant index expected"); |
18991 | SDLoc dl(Op); |
18992 | |
18993 | if (!Vec.getSimpleValueType().is128BitVector()) |
18994 | return SDValue(); |
18995 | |
18996 | if (VT.getSizeInBits() == 8) { |
18997 | |
18998 | |
18999 | if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) && |
19000 | !MayFoldIntoStore(Op)) |
19001 | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, |
19002 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
19003 | DAG.getBitcast(MVT::v4i32, Vec), Idx)); |
19004 | |
19005 | unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); |
19006 | SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec, |
19007 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
19008 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); |
19009 | } |
19010 | |
19011 | if (VT == MVT::f32) { |
19012 | |
19013 | |
19014 | |
19015 | |
19016 | |
19017 | if (!Op.hasOneUse()) |
19018 | return SDValue(); |
19019 | SDNode *User = *Op.getNode()->use_begin(); |
19020 | if ((User->getOpcode() != ISD::STORE || isNullConstant(Idx)) && |
19021 | (User->getOpcode() != ISD::BITCAST || |
19022 | User->getValueType(0) != MVT::i32)) |
19023 | return SDValue(); |
19024 | SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
19025 | DAG.getBitcast(MVT::v4i32, Vec), Idx); |
19026 | return DAG.getBitcast(MVT::f32, Extract); |
19027 | } |
19028 | |
19029 | if (VT == MVT::i32 || VT == MVT::i64) |
19030 | return Op; |
19031 | |
19032 | return SDValue(); |
19033 | } |
19034 | |
19035 | |
19036 | |
19037 | static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, |
19038 | const X86Subtarget &Subtarget) { |
19039 | SDValue Vec = Op.getOperand(0); |
19040 | SDLoc dl(Vec); |
19041 | MVT VecVT = Vec.getSimpleValueType(); |
19042 | SDValue Idx = Op.getOperand(1); |
19043 | auto* IdxC = dyn_cast<ConstantSDNode>(Idx); |
19044 | MVT EltVT = Op.getSimpleValueType(); |
19045 | |
19046 | assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) && |
19047 | "Unexpected vector type in ExtractBitFromMaskVector"); |
19048 | |
19049 | |
19050 | |
19051 | if (!IdxC) { |
19052 | unsigned NumElts = VecVT.getVectorNumElements(); |
19053 | |
19054 | |
19055 | MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; |
19056 | MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); |
19057 | SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec); |
19058 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx); |
19059 | return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); |
19060 | } |
19061 | |
19062 | unsigned IdxVal = IdxC->getZExtValue(); |
19063 | if (IdxVal == 0) |
19064 | return Op; |
19065 | |
19066 | |
19067 | unsigned NumElems = VecVT.getVectorNumElements(); |
19068 | MVT WideVecVT = VecVT; |
19069 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { |
19070 | WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
19071 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, |
19072 | DAG.getUNDEF(WideVecVT), Vec, |
19073 | DAG.getIntPtrConstant(0, dl)); |
19074 | } |
19075 | |
19076 | |
19077 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, |
19078 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
19079 | |
19080 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, |
19081 | DAG.getIntPtrConstant(0, dl)); |
19082 | } |
19083 | |
19084 | SDValue |
19085 | X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, |
19086 | SelectionDAG &DAG) const { |
19087 | SDLoc dl(Op); |
19088 | SDValue Vec = Op.getOperand(0); |
19089 | MVT VecVT = Vec.getSimpleValueType(); |
19090 | SDValue Idx = Op.getOperand(1); |
19091 | auto* IdxC = dyn_cast<ConstantSDNode>(Idx); |
19092 | |
19093 | if (VecVT.getVectorElementType() == MVT::i1) |
19094 | return ExtractBitFromMaskVector(Op, DAG, Subtarget); |
19095 | |
19096 | if (!IdxC) { |
19097 | |
19098 | |
19099 | |
19100 | |
19101 | |
19102 | |
19103 | |
19104 | |
19105 | |
19106 | |
19107 | |
19108 | |
19109 | |
19110 | |
19111 | |
19112 | |
19113 | |
19114 | |
19115 | |
19116 | |
19117 | |
19118 | |
19119 | |
19120 | |
19121 | |
19122 | |
19123 | |
19124 | |
19125 | |
19126 | |
19127 | return SDValue(); |
19128 | } |
19129 | |
19130 | unsigned IdxVal = IdxC->getZExtValue(); |
19131 | |
19132 | |
19133 | |
19134 | if (VecVT.is256BitVector() || VecVT.is512BitVector()) { |
19135 | |
19136 | Vec = extract128BitVector(Vec, IdxVal, DAG, dl); |
19137 | MVT EltVT = VecVT.getVectorElementType(); |
19138 | |
19139 | unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits(); |
19140 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); |
19141 | |
19142 | |
19143 | |
19144 | IdxVal &= ElemsPerChunk - 1; |
19145 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, |
19146 | DAG.getIntPtrConstant(IdxVal, dl)); |
19147 | } |
19148 | |
19149 | assert(VecVT.is128BitVector() && "Unexpected vector length"); |
19150 | |
19151 | MVT VT = Op.getSimpleValueType(); |
19152 | |
19153 | if (VT == MVT::i16) { |
19154 | |
19155 | |
19156 | if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) && |
19157 | !(Subtarget.hasSSE41() && MayFoldIntoStore(Op))) { |
19158 | if (Subtarget.hasFP16()) |
19159 | return Op; |
19160 | |
19161 | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, |
19162 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
19163 | DAG.getBitcast(MVT::v4i32, Vec), Idx)); |
19164 | } |
19165 | |
19166 | SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec, |
19167 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
19168 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); |
19169 | } |
19170 | |
19171 | if (Subtarget.hasSSE41()) |
19172 | if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG)) |
19173 | return Res; |
19174 | |
19175 | |
19176 | |
19177 | |
19178 | if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) { |
19179 | |
19180 | int DWordIdx = IdxVal / 4; |
19181 | if (DWordIdx == 0) { |
19182 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
19183 | DAG.getBitcast(MVT::v4i32, Vec), |
19184 | DAG.getIntPtrConstant(DWordIdx, dl)); |
19185 | int ShiftVal = (IdxVal % 4) * 8; |
19186 | if (ShiftVal != 0) |
19187 | Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res, |
19188 | DAG.getConstant(ShiftVal, dl, MVT::i8)); |
19189 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
19190 | } |
19191 | |
19192 | int WordIdx = IdxVal / 2; |
19193 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, |
19194 | DAG.getBitcast(MVT::v8i16, Vec), |
19195 | DAG.getIntPtrConstant(WordIdx, dl)); |
19196 | int ShiftVal = (IdxVal % 2) * 8; |
19197 | if (ShiftVal != 0) |
19198 | Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res, |
19199 | DAG.getConstant(ShiftVal, dl, MVT::i8)); |
19200 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
19201 | } |
19202 | |
19203 | if (VT == MVT::f16 || VT.getSizeInBits() == 32) { |
19204 | if (IdxVal == 0) |
19205 | return Op; |
19206 | |
19207 | |
19208 | SmallVector<int, 8> Mask(VecVT.getVectorNumElements(), -1); |
19209 | Mask[0] = static_cast<int>(IdxVal); |
19210 | Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); |
19211 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, |
19212 | DAG.getIntPtrConstant(0, dl)); |
19213 | } |
19214 | |
19215 | if (VT.getSizeInBits() == 64) { |
19216 | |
19217 | |
19218 | |
19219 | if (IdxVal == 0) |
19220 | return Op; |
19221 | |
19222 | |
19223 | |
19224 | |
19225 | int Mask[2] = { 1, -1 }; |
19226 | Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); |
19227 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, |
19228 | DAG.getIntPtrConstant(0, dl)); |
19229 | } |
19230 | |
19231 | return SDValue(); |
19232 | } |
19233 | |
19234 | |
19235 | |
19236 | static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, |
19237 | const X86Subtarget &Subtarget) { |
19238 | SDLoc dl(Op); |
19239 | SDValue Vec = Op.getOperand(0); |
19240 | SDValue Elt = Op.getOperand(1); |
19241 | SDValue Idx = Op.getOperand(2); |
19242 | MVT VecVT = Vec.getSimpleValueType(); |
19243 | |
19244 | if (!isa<ConstantSDNode>(Idx)) { |
19245 | |
19246 | |
19247 | unsigned NumElts = VecVT.getVectorNumElements(); |
19248 | MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; |
19249 | MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); |
19250 | SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, |
19251 | DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec), |
19252 | DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx); |
19253 | return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp); |
19254 | } |
19255 | |
19256 | |
19257 | SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Elt); |
19258 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, Vec, EltInVec, Idx); |
19259 | } |
19260 | |
19261 | SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, |
19262 | SelectionDAG &DAG) const { |
19263 | MVT VT = Op.getSimpleValueType(); |
19264 | MVT EltVT = VT.getVectorElementType(); |
19265 | unsigned NumElts = VT.getVectorNumElements(); |
19266 | unsigned EltSizeInBits = EltVT.getScalarSizeInBits(); |
19267 | |
19268 | if (EltVT == MVT::i1) |
19269 | return InsertBitToMaskVector(Op, DAG, Subtarget); |
19270 | |
19271 | SDLoc dl(Op); |
19272 | SDValue N0 = Op.getOperand(0); |
19273 | SDValue N1 = Op.getOperand(1); |
19274 | SDValue N2 = Op.getOperand(2); |
19275 | auto *N2C = dyn_cast<ConstantSDNode>(N2); |
19276 | |
19277 | if (!N2C) { |
19278 | |
19279 | |
19280 | |
19281 | if (!(Subtarget.hasBWI() || |
19282 | (Subtarget.hasAVX512() && EltSizeInBits >= 32) || |
19283 | (Subtarget.hasSSE41() && VT.isFloatingPoint()))) |
19284 | return SDValue(); |
19285 | |
19286 | MVT IdxSVT = MVT::getIntegerVT(EltSizeInBits); |
19287 | MVT IdxVT = MVT::getVectorVT(IdxSVT, NumElts); |
19288 | if (!isTypeLegal(IdxSVT) || !isTypeLegal(IdxVT)) |
19289 | return SDValue(); |
19290 | |
19291 | SDValue IdxExt = DAG.getZExtOrTrunc(N2, dl, IdxSVT); |
19292 | SDValue IdxSplat = DAG.getSplatBuildVector(IdxVT, dl, IdxExt); |
19293 | SDValue EltSplat = DAG.getSplatBuildVector(VT, dl, N1); |
19294 | |
19295 | SmallVector<SDValue, 16> RawIndices; |
19296 | for (unsigned I = 0; I != NumElts; ++I) |
19297 | RawIndices.push_back(DAG.getConstant(I, dl, IdxSVT)); |
19298 | SDValue Indices = DAG.getBuildVector(IdxVT, dl, RawIndices); |
19299 | |
19300 | |
19301 | return DAG.getSelectCC(dl, IdxSplat, Indices, EltSplat, N0, |
19302 | ISD::CondCode::SETEQ); |
19303 | } |
19304 | |
19305 | if (N2C->getAPIntValue().uge(NumElts)) |
19306 | return SDValue(); |
19307 | uint64_t IdxVal = N2C->getZExtValue(); |
19308 | |
19309 | bool IsZeroElt = X86::isZeroNode(N1); |
19310 | bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1); |
19311 | |
19312 | |
19313 | |
19314 | |
19315 | if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && |
19316 | (16 <= EltSizeInBits || (IsZeroElt && !VT.is128BitVector()))) { |
19317 | SmallVector<int, 8> BlendMask; |
19318 | for (unsigned i = 0; i != NumElts; ++i) |
19319 | BlendMask.push_back(i == IdxVal ? i + NumElts : i); |
19320 | SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl) |
19321 | : getOnesVector(VT, DAG, dl); |
19322 | return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask); |
19323 | } |
19324 | |
19325 | |
19326 | |
19327 | if (VT.is256BitVector() || VT.is512BitVector()) { |
19328 | |
19329 | |
19330 | if (VT.is256BitVector() && IdxVal == 0) { |
19331 | |
19332 | |
19333 | |
19334 | if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || |
19335 | (Subtarget.hasAVX2() && EltVT == MVT::i32)) { |
19336 | SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); |
19337 | return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, |
19338 | DAG.getTargetConstant(1, dl, MVT::i8)); |
19339 | } |
19340 | } |
19341 | |
19342 | unsigned NumEltsIn128 = 128 / EltSizeInBits; |
19343 | assert(isPowerOf2_32(NumEltsIn128) && |
19344 | "Vectors will always have power-of-two number of elements."); |
19345 | |
19346 | |
19347 | |
19348 | |
19349 | if (!VT.is128BitVector() && IdxVal >= NumEltsIn128 && |
19350 | ((Subtarget.hasAVX2() && EltSizeInBits != 8) || |
19351 | (Subtarget.hasAVX() && (EltSizeInBits >= 32) && MayFoldLoad(N1)))) { |
19352 | SDValue N1SplatVec = DAG.getSplatBuildVector(VT, dl, N1); |
19353 | SmallVector<int, 8> BlendMask; |
19354 | for (unsigned i = 0; i != NumElts; ++i) |
19355 | BlendMask.push_back(i == IdxVal ? i + NumElts : i); |
19356 | return DAG.getVectorShuffle(VT, dl, N0, N1SplatVec, BlendMask); |
19357 | } |
19358 | |
19359 | |
19360 | SDValue V = extract128BitVector(N0, IdxVal, DAG, dl); |
19361 | |
19362 | |
19363 | |
19364 | unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1); |
19365 | |
19366 | V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, |
19367 | DAG.getIntPtrConstant(IdxIn128, dl)); |
19368 | |
19369 | |
19370 | return insert128BitVector(N0, V, IdxVal, DAG, dl); |
19371 | } |
19372 | assert(VT.is128BitVector() && "Only 128-bit vector types should be left!"); |
19373 | |
19374 | |
19375 | if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode())) { |
19376 | if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 || |
19377 | EltVT == MVT::f16 || EltVT == MVT::i64) { |
19378 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); |
19379 | return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG); |
19380 | } |
19381 | |
19382 | |
19383 | |
19384 | if (EltVT == MVT::i16 || EltVT == MVT::i8) { |
19385 | N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, N1); |
19386 | MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); |
19387 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, N1); |
19388 | N1 = getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG); |
19389 | return DAG.getBitcast(VT, N1); |
19390 | } |
19391 | } |
19392 | |
19393 | |
19394 | |
19395 | if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) { |
19396 | unsigned Opc; |
19397 | if (VT == MVT::v8i16) { |
19398 | assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW"); |
19399 | Opc = X86ISD::PINSRW; |
19400 | } else { |
19401 | assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector"); |
19402 | assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB"); |
19403 | Opc = X86ISD::PINSRB; |
19404 | } |
19405 | |
19406 | assert(N1.getValueType() != MVT::i32 && "Unexpected VT"); |
19407 | N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); |
19408 | N2 = DAG.getTargetConstant(IdxVal, dl, MVT::i8); |
19409 | return DAG.getNode(Opc, dl, VT, N0, N1, N2); |
19410 | } |
19411 | |
19412 | if (Subtarget.hasSSE41()) { |
19413 | if (EltVT == MVT::f32) { |
19414 | |
19415 | |
19416 | |
19417 | |
19418 | |
19419 | |
19420 | |
19421 | |
19422 | |
19423 | bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize(); |
19424 | if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) { |
19425 | |
19426 | |
19427 | |
19428 | |
19429 | |
19430 | |
19431 | |
19432 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); |
19433 | return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, |
19434 | DAG.getTargetConstant(1, dl, MVT::i8)); |
19435 | } |
19436 | |
19437 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); |
19438 | return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, |
19439 | DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8)); |
19440 | } |
19441 | |
19442 | |
19443 | if (EltVT == MVT::i32 || EltVT == MVT::i64) |
19444 | return Op; |
19445 | } |
19446 | |
19447 | return SDValue(); |
19448 | } |
19449 | |
19450 | static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget, |
19451 | SelectionDAG &DAG) { |
19452 | SDLoc dl(Op); |
19453 | MVT OpVT = Op.getSimpleValueType(); |
19454 | |
19455 | |
19456 | |
19457 | if (X86::isZeroNode(Op.getOperand(0))) |
19458 | return getZeroVector(OpVT, Subtarget, DAG, dl); |
19459 | |
19460 | |
19461 | |
19462 | if (!OpVT.is128BitVector()) { |
19463 | |
19464 | unsigned SizeFactor = OpVT.getSizeInBits() / 128; |
19465 | MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(), |
19466 | OpVT.getVectorNumElements() / SizeFactor); |
19467 | |
19468 | Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0)); |
19469 | |
19470 | |
19471 | return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl); |
19472 | } |
19473 | assert(OpVT.is128BitVector() && OpVT.isInteger() && OpVT != MVT::v2i64 && |
19474 | "Expected an SSE type!"); |
19475 | |
19476 | |
19477 | |
19478 | if (OpVT == MVT::v4i32 || (OpVT == MVT::v8i16 && Subtarget.hasFP16())) |
19479 | return Op; |
19480 | |
19481 | SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); |
19482 | return DAG.getBitcast( |
19483 | OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt)); |
19484 | } |
19485 | |
19486 | |
19487 | |
19488 | |
19489 | static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, |
19490 | SelectionDAG &DAG) { |
19491 | assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1); |
19492 | |
19493 | return insert1BitVector(Op, DAG, Subtarget); |
19494 | } |
19495 | |
19496 | static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, |
19497 | SelectionDAG &DAG) { |
19498 | assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 && |
19499 | "Only vXi1 extract_subvectors need custom lowering"); |
19500 | |
19501 | SDLoc dl(Op); |
19502 | SDValue Vec = Op.getOperand(0); |
19503 | uint64_t IdxVal = Op.getConstantOperandVal(1); |
19504 | |
19505 | if (IdxVal == 0) |
19506 | return Op; |
19507 | |
19508 | MVT VecVT = Vec.getSimpleValueType(); |
19509 | unsigned NumElems = VecVT.getVectorNumElements(); |
19510 | |
19511 | |
19512 | MVT WideVecVT = VecVT; |
19513 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { |
19514 | WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
19515 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, |
19516 | DAG.getUNDEF(WideVecVT), Vec, |
19517 | DAG.getIntPtrConstant(0, dl)); |
19518 | } |
19519 | |
19520 | |
19521 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, |
19522 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
19523 | |
19524 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, |
19525 | DAG.getIntPtrConstant(0, dl)); |
19526 | } |
19527 | |
19528 | |
19529 | unsigned X86TargetLowering::getGlobalWrapperKind( |
19530 | const GlobalValue *GV, const unsigned char OpFlags) const { |
19531 | |
19532 | if (GV && GV->isAbsoluteSymbolRef()) |
19533 | return X86ISD::Wrapper; |
19534 | |
19535 | CodeModel::Model M = getTargetMachine().getCodeModel(); |
19536 | if (Subtarget.isPICStyleRIPRel() && |
19537 | (M == CodeModel::Small || M == CodeModel::Kernel)) |
19538 | return X86ISD::WrapperRIP; |
19539 | |
19540 | |
19541 | if (OpFlags == X86II::MO_GOTPCREL) |
19542 | return X86ISD::WrapperRIP; |
19543 | |
19544 | return X86ISD::Wrapper; |
19545 | } |
19546 | |
19547 | |
19548 | |
19549 | |
19550 | |
19551 | |
19552 | |
19553 | SDValue |
19554 | X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { |
19555 | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); |
19556 | |
19557 | |
19558 | |
19559 | unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); |
19560 | |
19561 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19562 | SDValue Result = DAG.getTargetConstantPool( |
19563 | CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset(), OpFlag); |
19564 | SDLoc DL(CP); |
19565 | Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result); |
19566 | |
19567 | if (OpFlag) { |
19568 | Result = |
19569 | DAG.getNode(ISD::ADD, DL, PtrVT, |
19570 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); |
19571 | } |
19572 | |
19573 | return Result; |
19574 | } |
19575 | |
19576 | SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { |
19577 | JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); |
19578 | |
19579 | |
19580 | |
19581 | unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); |
19582 | |
19583 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19584 | SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); |
19585 | SDLoc DL(JT); |
19586 | Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result); |
19587 | |
19588 | |
19589 | if (OpFlag) |
19590 | Result = |
19591 | DAG.getNode(ISD::ADD, DL, PtrVT, |
19592 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); |
19593 | |
19594 | return Result; |
19595 | } |
19596 | |
19597 | SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op, |
19598 | SelectionDAG &DAG) const { |
19599 | return LowerGlobalOrExternal(Op, DAG, false); |
19600 | } |
19601 | |
19602 | SDValue |
19603 | X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { |
19604 | |
19605 | unsigned char OpFlags = |
19606 | Subtarget.classifyBlockAddressReference(); |
19607 | const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); |
19608 | int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset(); |
19609 | SDLoc dl(Op); |
19610 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19611 | SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags); |
19612 | Result = DAG.getNode(getGlobalWrapperKind(), dl, PtrVT, Result); |
19613 | |
19614 | |
19615 | if (isGlobalRelativeToPICBase(OpFlags)) { |
19616 | Result = DAG.getNode(ISD::ADD, dl, PtrVT, |
19617 | DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); |
19618 | } |
19619 | |
19620 | return Result; |
19621 | } |
19622 | |
19623 | |
19624 | |
19625 | SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, |
19626 | bool ForCall) const { |
19627 | |
19628 | const SDLoc &dl = SDLoc(Op); |
19629 | const GlobalValue *GV = nullptr; |
19630 | int64_t Offset = 0; |
19631 | const char *ExternalSym = nullptr; |
19632 | if (const auto *G = dyn_cast<GlobalAddressSDNode>(Op)) { |
19633 | GV = G->getGlobal(); |
19634 | Offset = G->getOffset(); |
19635 | } else { |
19636 | const auto *ES = cast<ExternalSymbolSDNode>(Op); |
19637 | ExternalSym = ES->getSymbol(); |
19638 | } |
19639 | |
19640 | |
19641 | const Module &Mod = *DAG.getMachineFunction().getFunction().getParent(); |
19642 | unsigned char OpFlags; |
19643 | if (ForCall) |
19644 | OpFlags = Subtarget.classifyGlobalFunctionReference(GV, Mod); |
19645 | else |
19646 | OpFlags = Subtarget.classifyGlobalReference(GV, Mod); |
19647 | bool HasPICReg = isGlobalRelativeToPICBase(OpFlags); |
19648 | bool NeedsLoad = isGlobalStubReference(OpFlags); |
19649 | |
19650 | CodeModel::Model M = DAG.getTarget().getCodeModel(); |
19651 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19652 | SDValue Result; |
19653 | |
19654 | if (GV) { |
19655 | |
19656 | |
19657 | |
19658 | |
19659 | |
19660 | int64_t GlobalOffset = 0; |
19661 | if (OpFlags == X86II::MO_NO_FLAG && Offset >= 0 && |
19662 | X86::isOffsetSuitableForCodeModel(Offset, M, true)) { |
19663 | std::swap(GlobalOffset, Offset); |
19664 | } |
19665 | Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GlobalOffset, OpFlags); |
19666 | } else { |
19667 | |
19668 | Result = DAG.getTargetExternalSymbol(ExternalSym, PtrVT, OpFlags); |
19669 | } |
19670 | |
19671 | |
19672 | |
19673 | if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0) |
19674 | return Result; |
19675 | |
19676 | Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result); |
19677 | |
19678 | |
19679 | if (HasPICReg) { |
19680 | Result = DAG.getNode(ISD::ADD, dl, PtrVT, |
19681 | DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); |
19682 | } |
19683 | |
19684 | |
19685 | |
19686 | if (NeedsLoad) |
19687 | Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, |
19688 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
19689 | |
19690 | |
19691 | |
19692 | if (Offset != 0) |
19693 | Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, |
19694 | DAG.getConstant(Offset, dl, PtrVT)); |
19695 | |
19696 | return Result; |
19697 | } |
19698 | |
19699 | SDValue |
19700 | X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { |
19701 | return LowerGlobalOrExternal(Op, DAG, false); |
19702 | } |
19703 | |
19704 | static SDValue |
19705 | GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, |
19706 | SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, |
19707 | unsigned char OperandFlags, bool LocalDynamic = false) { |
19708 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
19709 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
19710 | SDLoc dl(GA); |
19711 | SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, |
19712 | GA->getValueType(0), |
19713 | GA->getOffset(), |
19714 | OperandFlags); |
19715 | |
19716 | X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR |
19717 | : X86ISD::TLSADDR; |
19718 | |
19719 | if (InFlag) { |
19720 | SDValue Ops[] = { Chain, TGA, *InFlag }; |
19721 | Chain = DAG.getNode(CallType, dl, NodeTys, Ops); |
19722 | } else { |
19723 | SDValue Ops[] = { Chain, TGA }; |
19724 | Chain = DAG.getNode(CallType, dl, NodeTys, Ops); |
19725 | } |
19726 | |
19727 | |
19728 | MFI.setAdjustsStack(true); |
19729 | MFI.setHasCalls(true); |
19730 | |
19731 | SDValue Flag = Chain.getValue(1); |
19732 | return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); |
19733 | } |
19734 | |
19735 | |
19736 | static SDValue |
19737 | LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
19738 | const EVT PtrVT) { |
19739 | SDValue InFlag; |
19740 | SDLoc dl(GA); |
19741 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, |
19742 | DAG.getNode(X86ISD::GlobalBaseReg, |
19743 | SDLoc(), PtrVT), InFlag); |
19744 | InFlag = Chain.getValue(1); |
19745 | |
19746 | return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); |
19747 | } |
19748 | |
19749 | |
19750 | static SDValue |
19751 | LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
19752 | const EVT PtrVT) { |
19753 | return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, |
19754 | X86::RAX, X86II::MO_TLSGD); |
19755 | } |
19756 | |
19757 | |
19758 | static SDValue |
19759 | LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
19760 | const EVT PtrVT) { |
19761 | return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, |
19762 | X86::EAX, X86II::MO_TLSGD); |
19763 | } |
19764 | |
19765 | static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, |
19766 | SelectionDAG &DAG, const EVT PtrVT, |
19767 | bool Is64Bit, bool Is64BitLP64) { |
19768 | SDLoc dl(GA); |
19769 | |
19770 | |
19771 | X86MachineFunctionInfo *MFI = DAG.getMachineFunction() |
19772 | .getInfo<X86MachineFunctionInfo>(); |
19773 | MFI->incNumLocalDynamicTLSAccesses(); |
19774 | |
19775 | SDValue Base; |
19776 | if (Is64Bit) { |
19777 | unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX; |
19778 | Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, |
19779 | X86II::MO_TLSLD, true); |
19780 | } else { |
19781 | SDValue InFlag; |
19782 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, |
19783 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag); |
19784 | InFlag = Chain.getValue(1); |
19785 | Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, |
19786 | X86II::MO_TLSLDM, true); |
19787 | } |
19788 | |
19789 | |
19790 | |
19791 | |
19792 | |
19793 | unsigned char OperandFlags = X86II::MO_DTPOFF; |
19794 | unsigned WrapperKind = X86ISD::Wrapper; |
19795 | SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, |
19796 | GA->getValueType(0), |
19797 | GA->getOffset(), OperandFlags); |
19798 | SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); |
19799 | |
19800 | |
19801 | return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base); |
19802 | } |
19803 | |
19804 | |
19805 | static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
19806 | const EVT PtrVT, TLSModel::Model model, |
19807 | bool is64Bit, bool isPIC) { |
19808 | SDLoc dl(GA); |
19809 | |
19810 | |
19811 | Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), |
19812 | is64Bit ? 257 : 256)); |
19813 | |
19814 | SDValue ThreadPointer = |
19815 | DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), |
19816 | MachinePointerInfo(Ptr)); |
19817 | |
19818 | unsigned char OperandFlags = 0; |
19819 | |
19820 | |
19821 | unsigned WrapperKind = X86ISD::Wrapper; |
19822 | if (model == TLSModel::LocalExec) { |
19823 | OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; |
19824 | } else if (model == TLSModel::InitialExec) { |
19825 | if (is64Bit) { |
19826 | OperandFlags = X86II::MO_GOTTPOFF; |
19827 | WrapperKind = X86ISD::WrapperRIP; |
19828 | } else { |
19829 | OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF; |
19830 | } |
19831 | } else { |
19832 | llvm_unreachable("Unexpected model"); |
19833 | } |
19834 | |
19835 | |
19836 | |
19837 | |
19838 | SDValue TGA = |
19839 | DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), |
19840 | GA->getOffset(), OperandFlags); |
19841 | SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); |
19842 | |
19843 | if (model == TLSModel::InitialExec) { |
19844 | if (isPIC && !is64Bit) { |
19845 | Offset = DAG.getNode(ISD::ADD, dl, PtrVT, |
19846 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), |
19847 | Offset); |
19848 | } |
19849 | |
19850 | Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, |
19851 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
19852 | } |
19853 | |
19854 | |
19855 | |
19856 | return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); |
19857 | } |
19858 | |
19859 | SDValue |
19860 | X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { |
19861 | |
19862 | GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); |
19863 | |
19864 | if (DAG.getTarget().useEmulatedTLS()) |
19865 | return LowerToTLSEmulatedModel(GA, DAG); |
19866 | |
19867 | const GlobalValue *GV = GA->getGlobal(); |
19868 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19869 | bool PositionIndependent = isPositionIndependent(); |
19870 | |
19871 | if (Subtarget.isTargetELF()) { |
19872 | TLSModel::Model model = DAG.getTarget().getTLSModel(GV); |
19873 | switch (model) { |
19874 | case TLSModel::GeneralDynamic: |
19875 | if (Subtarget.is64Bit()) { |
19876 | if (Subtarget.isTarget64BitLP64()) |
19877 | return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); |
19878 | return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT); |
19879 | } |
19880 | return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); |
19881 | case TLSModel::LocalDynamic: |
19882 | return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(), |
19883 | Subtarget.isTarget64BitLP64()); |
19884 | case TLSModel::InitialExec: |
19885 | case TLSModel::LocalExec: |
19886 | return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), |
19887 | PositionIndependent); |
19888 | } |
19889 | llvm_unreachable("Unknown TLS model."); |
19890 | } |
19891 | |
19892 | if (Subtarget.isTargetDarwin()) { |
19893 | |
19894 | unsigned char OpFlag = 0; |
19895 | unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ? |
19896 | X86ISD::WrapperRIP : X86ISD::Wrapper; |
19897 | |
19898 | |
19899 | |
19900 | bool PIC32 = PositionIndependent && !Subtarget.is64Bit(); |
19901 | if (PIC32) |
19902 | OpFlag = X86II::MO_TLVP_PIC_BASE; |
19903 | else |
19904 | OpFlag = X86II::MO_TLVP; |
19905 | SDLoc DL(Op); |
19906 | SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, |
19907 | GA->getValueType(0), |
19908 | GA->getOffset(), OpFlag); |
19909 | SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result); |
19910 | |
19911 | |
19912 | if (PIC32) |
19913 | Offset = DAG.getNode(ISD::ADD, DL, PtrVT, |
19914 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), |
19915 | Offset); |
19916 | |
19917 | |
19918 | |
19919 | SDValue Chain = DAG.getEntryNode(); |
19920 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
19921 | Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); |
19922 | SDValue Args[] = { Chain, Offset }; |
19923 | Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); |
19924 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), |
19925 | DAG.getIntPtrConstant(0, DL, true), |
19926 | Chain.getValue(1), DL); |
19927 | |
19928 | |
19929 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
19930 | MFI.setAdjustsStack(true); |
19931 | |
19932 | |
19933 | |
19934 | unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; |
19935 | return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1)); |
19936 | } |
19937 | |
19938 | if (Subtarget.isOSWindows()) { |
19939 | |
19940 | |
19941 | |
19942 | |
19943 | |
19944 | |
19945 | |
19946 | |
19947 | |
19948 | |
19949 | |
19950 | SDLoc dl(GA); |
19951 | SDValue Chain = DAG.getEntryNode(); |
19952 | |
19953 | |
19954 | |
19955 | |
19956 | Value *Ptr = Constant::getNullValue(Subtarget.is64Bit() |
19957 | ? Type::getInt8PtrTy(*DAG.getContext(), |
19958 | 256) |
19959 | : Type::getInt32PtrTy(*DAG.getContext(), |
19960 | 257)); |
19961 | |
19962 | SDValue TlsArray = Subtarget.is64Bit() |
19963 | ? DAG.getIntPtrConstant(0x58, dl) |
19964 | : (Subtarget.isTargetWindowsGNU() |
19965 | ? DAG.getIntPtrConstant(0x2C, dl) |
19966 | : DAG.getExternalSymbol("_tls_array", PtrVT)); |
19967 | |
19968 | SDValue ThreadPointer = |
19969 | DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr)); |
19970 | |
19971 | SDValue res; |
19972 | if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) { |
19973 | res = ThreadPointer; |
19974 | } else { |
19975 | |
19976 | SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT); |
19977 | if (Subtarget.is64Bit()) |
19978 | IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX, |
19979 | MachinePointerInfo(), MVT::i32); |
19980 | else |
19981 | IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo()); |
19982 | |
19983 | const DataLayout &DL = DAG.getDataLayout(); |
19984 | SDValue Scale = |
19985 | DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, MVT::i8); |
19986 | IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale); |
19987 | |
19988 | res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX); |
19989 | } |
19990 | |
19991 | res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo()); |
19992 | |
19993 | |
19994 | SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, |
19995 | GA->getValueType(0), |
19996 | GA->getOffset(), X86II::MO_SECREL); |
19997 | SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); |
19998 | |
19999 | |
20000 | |
20001 | return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset); |
20002 | } |
20003 | |
20004 | llvm_unreachable("TLS not implemented for this target."); |
20005 | } |
20006 | |
20007 | |
20008 | |
20009 | |
20010 | static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { |
20011 | SDValue Lo, Hi; |
20012 | DAG.getTargetLoweringInfo().expandShiftParts(Op.getNode(), Lo, Hi, DAG); |
20013 | return DAG.getMergeValues({Lo, Hi}, SDLoc(Op)); |
20014 | } |
20015 | |
20016 | static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, |
20017 | SelectionDAG &DAG) { |
20018 | MVT VT = Op.getSimpleValueType(); |
20019 | assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) && |
20020 | "Unexpected funnel shift opcode!"); |
20021 | |
20022 | SDLoc DL(Op); |
20023 | SDValue Op0 = Op.getOperand(0); |
20024 | SDValue Op1 = Op.getOperand(1); |
20025 | SDValue Amt = Op.getOperand(2); |
20026 | |
20027 | bool IsFSHR = Op.getOpcode() == ISD::FSHR; |
20028 | |
20029 | if (VT.isVector()) { |
20030 | assert(Subtarget.hasVBMI2() && "Expected VBMI2"); |
20031 | |
20032 | if (IsFSHR) |
20033 | std::swap(Op0, Op1); |
20034 | |
20035 | |
20036 | if (!Subtarget.hasVLX() && !VT.is512BitVector()) { |
20037 | Op0 = widenSubVector(Op0, false, Subtarget, DAG, DL, 512); |
20038 | Op1 = widenSubVector(Op1, false, Subtarget, DAG, DL, 512); |
20039 | } |
20040 | |
20041 | SDValue Funnel; |
20042 | APInt APIntShiftAmt; |
20043 | MVT ResultVT = Op0.getSimpleValueType(); |
20044 | if (X86::isConstantSplat(Amt, APIntShiftAmt)) { |
20045 | uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); |
20046 | Funnel = |
20047 | DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, ResultVT, Op0, |
20048 | Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
20049 | } else { |
20050 | if (!Subtarget.hasVLX() && !VT.is512BitVector()) |
20051 | Amt = widenSubVector(Amt, false, Subtarget, DAG, DL, 512); |
20052 | Funnel = DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, |
20053 | ResultVT, Op0, Op1, Amt); |
20054 | } |
20055 | if (!Subtarget.hasVLX() && !VT.is512BitVector()) |
20056 | Funnel = extractSubVector(Funnel, 0, DAG, DL, VT.getSizeInBits()); |
20057 | return Funnel; |
20058 | } |
20059 | assert( |
20060 | (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && |
20061 | "Unexpected funnel shift type!"); |
20062 | |
20063 | |
20064 | bool OptForSize = DAG.shouldOptForSize(); |
20065 | bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow(); |
20066 | |
20067 | |
20068 | |
20069 | if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) && |
20070 | !isa<ConstantSDNode>(Amt)) { |
20071 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
20072 | SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType()); |
20073 | SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType()); |
20074 | Op0 = DAG.getAnyExtOrTrunc(Op0, DL, MVT::i32); |
20075 | Op1 = DAG.getZExtOrTrunc(Op1, DL, MVT::i32); |
20076 | Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, Mask); |
20077 | SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift); |
20078 | Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, Op1); |
20079 | if (IsFSHR) { |
20080 | Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, Amt); |
20081 | } else { |
20082 | Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Res, Amt); |
20083 | Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, HiShift); |
20084 | } |
20085 | return DAG.getZExtOrTrunc(Res, DL, VT); |
20086 | } |
20087 | |
20088 | if (VT == MVT::i8 || ExpandFunnel) |
20089 | return SDValue(); |
20090 | |
20091 | |
20092 | if (VT == MVT::i16) { |
20093 | Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, |
20094 | DAG.getConstant(15, DL, Amt.getValueType())); |
20095 | unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL); |
20096 | return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt); |
20097 | } |
20098 | |
20099 | return Op; |
20100 | } |
20101 | |
20102 | |
20103 | |
20104 | static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG, |
20105 | const X86Subtarget &Subtarget) { |
20106 | assert((Op.getOpcode() == ISD::SINT_TO_FP || |
20107 | Op.getOpcode() == ISD::STRICT_SINT_TO_FP || |
20108 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP || |
20109 | Op.getOpcode() == ISD::UINT_TO_FP) && |
20110 | "Unexpected opcode!"); |
20111 | bool IsStrict = Op->isStrictFPOpcode(); |
20112 | unsigned OpNo = IsStrict ? 1 : 0; |
20113 | SDValue Src = Op.getOperand(OpNo); |
20114 | MVT SrcVT = Src.getSimpleValueType(); |
20115 | MVT VT = Op.getSimpleValueType(); |
20116 | |
20117 | if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() || |
20118 | (VT != MVT::f32 && VT != MVT::f64)) |
20119 | return SDValue(); |
20120 | |
20121 | |
20122 | |
20123 | |
20124 | unsigned NumElts = Subtarget.hasVLX() ? 4 : 8; |
20125 | MVT VecInVT = MVT::getVectorVT(MVT::i64, NumElts); |
20126 | MVT VecVT = MVT::getVectorVT(VT, NumElts); |
20127 | |
20128 | SDLoc dl(Op); |
20129 | SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src); |
20130 | if (IsStrict) { |
20131 | SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other}, |
20132 | {Op.getOperand(0), InVec}); |
20133 | SDValue Chain = CvtVec.getValue(1); |
20134 | SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, |
20135 | DAG.getIntPtrConstant(0, dl)); |
20136 | return DAG.getMergeValues({Value, Chain}, dl); |
20137 | } |
20138 | |
20139 | SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec); |
20140 | |
20141 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, |
20142 | DAG.getIntPtrConstant(0, dl)); |
20143 | } |
20144 | |
20145 | |
20146 | static SDValue LowerI64IntToFP16(SDValue Op, SelectionDAG &DAG, |
20147 | const X86Subtarget &Subtarget) { |
20148 | assert((Op.getOpcode() == ISD::SINT_TO_FP || |
20149 | Op.getOpcode() == ISD::STRICT_SINT_TO_FP || |
20150 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP || |
20151 | Op.getOpcode() == ISD::UINT_TO_FP) && |
20152 | "Unexpected opcode!"); |
20153 | bool IsStrict = Op->isStrictFPOpcode(); |
20154 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
20155 | MVT SrcVT = Src.getSimpleValueType(); |
20156 | MVT VT = Op.getSimpleValueType(); |
20157 | |
20158 | if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16) |
20159 | return SDValue(); |
20160 | |
20161 | |
20162 | |
20163 | assert(Subtarget.hasFP16() && "Expected FP16"); |
20164 | |
20165 | SDLoc dl(Op); |
20166 | SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src); |
20167 | if (IsStrict) { |
20168 | SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {MVT::v2f16, MVT::Other}, |
20169 | {Op.getOperand(0), InVec}); |
20170 | SDValue Chain = CvtVec.getValue(1); |
20171 | SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, |
20172 | DAG.getIntPtrConstant(0, dl)); |
20173 | return DAG.getMergeValues({Value, Chain}, dl); |
20174 | } |
20175 | |
20176 | SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, MVT::v2f16, InVec); |
20177 | |
20178 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, |
20179 | DAG.getIntPtrConstant(0, dl)); |
20180 | } |
20181 | |
20182 | static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT, |
20183 | const X86Subtarget &Subtarget) { |
20184 | switch (Opcode) { |
20185 | case ISD::SINT_TO_FP: |
20186 | |
20187 | if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32) |
20188 | return false; |
20189 | |
20190 | return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64); |
20191 | |
20192 | case ISD::UINT_TO_FP: |
20193 | |
20194 | if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32) |
20195 | return false; |
20196 | |
20197 | return ToVT == MVT::v4f32 || ToVT == MVT::v4f64; |
20198 | |
20199 | default: |
20200 | return false; |
20201 | } |
20202 | } |
20203 | |
20204 | |
20205 | |
20206 | |
20207 | static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG, |
20208 | const X86Subtarget &Subtarget) { |
20209 | |
20210 | |
20211 | SDValue Extract = Cast.getOperand(0); |
20212 | MVT DestVT = Cast.getSimpleValueType(); |
20213 | if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
20214 | !isa<ConstantSDNode>(Extract.getOperand(1))) |
20215 | return SDValue(); |
20216 | |
20217 | |
20218 | SDValue VecOp = Extract.getOperand(0); |
20219 | MVT FromVT = VecOp.getSimpleValueType(); |
20220 | unsigned NumEltsInXMM = 128 / FromVT.getScalarSizeInBits(); |
20221 | MVT Vec128VT = MVT::getVectorVT(FromVT.getScalarType(), NumEltsInXMM); |
20222 | MVT ToVT = MVT::getVectorVT(DestVT, NumEltsInXMM); |
20223 | if (!useVectorCast(Cast.getOpcode(), Vec128VT, ToVT, Subtarget)) |
20224 | return SDValue(); |
20225 | |
20226 | |
20227 | |
20228 | SDLoc DL(Cast); |
20229 | if (!isNullConstant(Extract.getOperand(1))) { |
20230 | SmallVector<int, 16> Mask(FromVT.getVectorNumElements(), -1); |
20231 | Mask[0] = Extract.getConstantOperandVal(1); |
20232 | VecOp = DAG.getVectorShuffle(FromVT, DL, VecOp, DAG.getUNDEF(FromVT), Mask); |
20233 | } |
20234 | |
20235 | |
20236 | if (FromVT != Vec128VT) |
20237 | VecOp = extract128BitVector(VecOp, 0, DAG, DL); |
20238 | |
20239 | |
20240 | |
20241 | SDValue VCast = DAG.getNode(Cast.getOpcode(), DL, ToVT, VecOp); |
20242 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestVT, VCast, |
20243 | DAG.getIntPtrConstant(0, DL)); |
20244 | } |
20245 | |
20246 | |
20247 | |
20248 | |
20249 | static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG, |
20250 | const X86Subtarget &Subtarget) { |
20251 | |
20252 | SDValue CastToInt = CastToFP.getOperand(0); |
20253 | MVT VT = CastToFP.getSimpleValueType(); |
20254 | if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector()) |
20255 | return SDValue(); |
20256 | |
20257 | MVT IntVT = CastToInt.getSimpleValueType(); |
20258 | SDValue X = CastToInt.getOperand(0); |
20259 | MVT SrcVT = X.getSimpleValueType(); |
20260 | if (SrcVT != MVT::f32 && SrcVT != MVT::f64) |
20261 | return SDValue(); |
20262 | |
20263 | |
20264 | |
20265 | if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) || |
20266 | IntVT != MVT::i32) |
20267 | return SDValue(); |
20268 | |
20269 | unsigned SrcSize = SrcVT.getSizeInBits(); |
20270 | unsigned IntSize = IntVT.getSizeInBits(); |
20271 | unsigned VTSize = VT.getSizeInBits(); |
20272 | MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize); |
20273 | MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize); |
20274 | MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize); |
20275 | |
20276 | |
20277 | unsigned ToIntOpcode = |
20278 | SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; |
20279 | unsigned ToFPOpcode = |
20280 | IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; |
20281 | |
20282 | |
20283 | |
20284 | |
20285 | |
20286 | |
20287 | |
20288 | SDLoc DL(CastToFP); |
20289 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); |
20290 | SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, X); |
20291 | SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX); |
20292 | SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecVT, VCastToInt); |
20293 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx); |
20294 | } |
20295 | |
20296 | static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG, |
20297 | const X86Subtarget &Subtarget) { |
20298 | SDLoc DL(Op); |
20299 | bool IsStrict = Op->isStrictFPOpcode(); |
20300 | MVT VT = Op->getSimpleValueType(0); |
20301 | SDValue Src = Op->getOperand(IsStrict ? 1 : 0); |
20302 | |
20303 | if (Subtarget.hasDQI()) { |
20304 | assert(!Subtarget.hasVLX() && "Unexpected features"); |
20305 | |
20306 | assert((Src.getSimpleValueType() == MVT::v2i64 || |
20307 | Src.getSimpleValueType() == MVT::v4i64) && |
20308 | "Unsupported custom type"); |
20309 | |
20310 | |
20311 | assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) && |
20312 | "Unexpected VT!"); |
20313 | MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64; |
20314 | |
20315 | |
20316 | |
20317 | SDValue Tmp = IsStrict ? DAG.getConstant(0, DL, MVT::v8i64) |
20318 | : DAG.getUNDEF(MVT::v8i64); |
20319 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i64, Tmp, Src, |
20320 | DAG.getIntPtrConstant(0, DL)); |
20321 | SDValue Res, Chain; |
20322 | if (IsStrict) { |
20323 | Res = DAG.getNode(Op.getOpcode(), DL, {WideVT, MVT::Other}, |
20324 | {Op->getOperand(0), Src}); |
20325 | Chain = Res.getValue(1); |
20326 | } else { |
20327 | Res = DAG.getNode(Op.getOpcode(), DL, WideVT, Src); |
20328 | } |
20329 | |
20330 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
20331 | DAG.getIntPtrConstant(0, DL)); |
20332 | |
20333 | if (IsStrict) |
20334 | return DAG.getMergeValues({Res, Chain}, DL); |
20335 | return Res; |
20336 | } |
20337 | |
20338 | bool IsSigned = Op->getOpcode() == ISD::SINT_TO_FP || |
20339 | Op->getOpcode() == ISD::STRICT_SINT_TO_FP; |
20340 | if (VT != MVT::v4f32 || IsSigned) |
20341 | return SDValue(); |
20342 | |
20343 | SDValue Zero = DAG.getConstant(0, DL, MVT::v4i64); |
20344 | SDValue One = DAG.getConstant(1, DL, MVT::v4i64); |
20345 | SDValue Sign = DAG.getNode(ISD::OR, DL, MVT::v4i64, |
20346 | DAG.getNode(ISD::SRL, DL, MVT::v4i64, Src, One), |
20347 | DAG.getNode(ISD::AND, DL, MVT::v4i64, Src, One)); |
20348 | SDValue IsNeg = DAG.getSetCC(DL, MVT::v4i64, Src, Zero, ISD::SETLT); |
20349 | SDValue SignSrc = DAG.getSelect(DL, MVT::v4i64, IsNeg, Sign, Src); |
20350 | SmallVector<SDValue, 4> SignCvts(4); |
20351 | SmallVector<SDValue, 4> Chains(4); |
20352 | for (int i = 0; i != 4; ++i) { |
20353 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, SignSrc, |
20354 | DAG.getIntPtrConstant(i, DL)); |
20355 | if (IsStrict) { |
20356 | SignCvts[i] = |
20357 | DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {MVT::f32, MVT::Other}, |
20358 | {Op.getOperand(0), Elt}); |
20359 | Chains[i] = SignCvts[i].getValue(1); |
20360 | } else { |
20361 | SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, DL, MVT::f32, Elt); |
20362 | } |
20363 | } |
20364 | SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts); |
20365 | |
20366 | SDValue Slow, Chain; |
20367 | if (IsStrict) { |
20368 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); |
20369 | Slow = DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v4f32, MVT::Other}, |
20370 | {Chain, SignCvt, SignCvt}); |
20371 | Chain = Slow.getValue(1); |
20372 | } else { |
20373 | Slow = DAG.getNode(ISD::FADD, DL, MVT::v4f32, SignCvt, SignCvt); |
20374 | } |
20375 | |
20376 | IsNeg = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i32, IsNeg); |
20377 | SDValue Cvt = DAG.getSelect(DL, MVT::v4f32, IsNeg, Slow, SignCvt); |
20378 | |
20379 | if (IsStrict) |
20380 | return DAG.getMergeValues({Cvt, Chain}, DL); |
20381 | |
20382 | return Cvt; |
20383 | } |
20384 | |
20385 | SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, |
20386 | SelectionDAG &DAG) const { |
20387 | bool IsStrict = Op->isStrictFPOpcode(); |
20388 | unsigned OpNo = IsStrict ? 1 : 0; |
20389 | SDValue Src = Op.getOperand(OpNo); |
20390 | SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); |
20391 | MVT SrcVT = Src.getSimpleValueType(); |
20392 | MVT VT = Op.getSimpleValueType(); |
20393 | SDLoc dl(Op); |
20394 | |
20395 | if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) |
20396 | return Extract; |
20397 | |
20398 | if (SDValue R = lowerFPToIntToFP(Op, DAG, Subtarget)) |
20399 | return R; |
20400 | |
20401 | if (SrcVT.isVector()) { |
20402 | if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { |
20403 | |
20404 | |
20405 | if (IsStrict) |
20406 | return DAG.getNode( |
20407 | X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, |
20408 | {Chain, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
20409 | DAG.getUNDEF(SrcVT))}); |
20410 | return DAG.getNode(X86ISD::CVTSI2P, dl, VT, |
20411 | DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
20412 | DAG.getUNDEF(SrcVT))); |
20413 | } |
20414 | if (SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64) |
20415 | return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget); |
20416 | |
20417 | return SDValue(); |
20418 | } |
20419 | |
20420 | assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 && |
20421 | "Unknown SINT_TO_FP to lower!"); |
20422 | |
20423 | bool UseSSEReg = isScalarFPTypeInSSEReg(VT); |
20424 | |
20425 | |
20426 | |
20427 | if (SrcVT == MVT::i32 && UseSSEReg) |
20428 | return Op; |
20429 | if (SrcVT == MVT::i64 && UseSSEReg && Subtarget.is64Bit()) |
20430 | return Op; |
20431 | |
20432 | if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) |
20433 | return V; |
20434 | if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget)) |
20435 | return V; |
20436 | |
20437 | |
20438 | if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) { |
20439 | SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src); |
20440 | if (IsStrict) |
20441 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
20442 | {Chain, Ext}); |
20443 | |
20444 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext); |
20445 | } |
20446 | |
20447 | if (VT == MVT::f128) |
20448 | return SDValue(); |
20449 | |
20450 | SDValue ValueToStore = Src; |
20451 | if (SrcVT == MVT::i64 && Subtarget.hasSSE2() && !Subtarget.is64Bit()) |
20452 | |
20453 | |
20454 | |
20455 | ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); |
20456 | |
20457 | unsigned Size = SrcVT.getStoreSize(); |
20458 | Align Alignment(Size); |
20459 | MachineFunction &MF = DAG.getMachineFunction(); |
20460 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
20461 | int SSFI = MF.getFrameInfo().CreateStackObject(Size, Alignment, false); |
20462 | MachinePointerInfo MPI = |
20463 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI); |
20464 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
20465 | Chain = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Alignment); |
20466 | std::pair<SDValue, SDValue> Tmp = |
20467 | BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG); |
20468 | |
20469 | if (IsStrict) |
20470 | return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); |
20471 | |
20472 | return Tmp.first; |
20473 | } |
20474 | |
20475 | std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD( |
20476 | EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer, |
20477 | MachinePointerInfo PtrInfo, Align Alignment, SelectionDAG &DAG) const { |
20478 | |
20479 | SDVTList Tys; |
20480 | bool useSSE = isScalarFPTypeInSSEReg(DstVT); |
20481 | if (useSSE) |
20482 | Tys = DAG.getVTList(MVT::f80, MVT::Other); |
20483 | else |
20484 | Tys = DAG.getVTList(DstVT, MVT::Other); |
20485 | |
20486 | SDValue FILDOps[] = {Chain, Pointer}; |
20487 | SDValue Result = |
20488 | DAG.getMemIntrinsicNode(X86ISD::FILD, DL, Tys, FILDOps, SrcVT, PtrInfo, |
20489 | Alignment, MachineMemOperand::MOLoad); |
20490 | Chain = Result.getValue(1); |
20491 | |
20492 | if (useSSE) { |
20493 | MachineFunction &MF = DAG.getMachineFunction(); |
20494 | unsigned SSFISize = DstVT.getStoreSize(); |
20495 | int SSFI = |
20496 | MF.getFrameInfo().CreateStackObject(SSFISize, Align(SSFISize), false); |
20497 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
20498 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
20499 | Tys = DAG.getVTList(MVT::Other); |
20500 | SDValue FSTOps[] = {Chain, Result, StackSlot}; |
20501 | MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand( |
20502 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), |
20503 | MachineMemOperand::MOStore, SSFISize, Align(SSFISize)); |
20504 | |
20505 | Chain = |
20506 | DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, FSTOps, DstVT, StoreMMO); |
20507 | Result = DAG.getLoad( |
20508 | DstVT, DL, Chain, StackSlot, |
20509 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); |
20510 | Chain = Result.getValue(1); |
20511 | } |
20512 | |
20513 | return { Result, Chain }; |
20514 | } |
20515 | |
20516 | |
20517 | |
20518 | |
20519 | static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, |
20520 | const X86Subtarget &Subtarget) { |
20521 | bool IsOptimizingSize = DAG.shouldOptForSize(); |
20522 | bool HasFastHOps = Subtarget.hasFastHorizontalOps(); |
20523 | return !IsSingleSource || IsOptimizingSize || HasFastHOps; |
20524 | } |
20525 | |
20526 | |
20527 | static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, |
20528 | const X86Subtarget &Subtarget) { |
20529 | |
20530 | |
20531 | |
20532 | assert(!Op->isStrictFPOpcode() && "Expected non-strict uint_to_fp!"); |
20533 | |
20534 | |
20535 | |
20536 | |
20537 | |
20538 | |
20539 | |
20540 | |
20541 | |
20542 | |
20543 | |
20544 | |
20545 | |
20546 | SDLoc dl(Op); |
20547 | LLVMContext *Context = DAG.getContext(); |
20548 | |
20549 | |
20550 | static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; |
20551 | Constant *C0 = ConstantDataVector::get(*Context, CV0); |
20552 | auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
20553 | SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, Align(16)); |
20554 | |
20555 | SmallVector<Constant*,2> CV1; |
20556 | CV1.push_back( |
20557 | ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), |
20558 | APInt(64, 0x4330000000000000ULL)))); |
20559 | CV1.push_back( |
20560 | ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), |
20561 | APInt(64, 0x4530000000000000ULL)))); |
20562 | Constant *C1 = ConstantVector::get(CV1); |
20563 | SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, Align(16)); |
20564 | |
20565 | |
20566 | SDValue XR1 = |
20567 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0)); |
20568 | SDValue CLod0 = DAG.getLoad( |
20569 | MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, |
20570 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16)); |
20571 | SDValue Unpck1 = |
20572 | getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0); |
20573 | |
20574 | SDValue CLod1 = DAG.getLoad( |
20575 | MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, |
20576 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16)); |
20577 | SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); |
20578 | |
20579 | SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); |
20580 | SDValue Result; |
20581 | |
20582 | if (Subtarget.hasSSE3() && |
20583 | shouldUseHorizontalOp(true, DAG, Subtarget)) { |
20584 | Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); |
20585 | } else { |
20586 | SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); |
20587 | Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub); |
20588 | } |
20589 | Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, |
20590 | DAG.getIntPtrConstant(0, dl)); |
20591 | return Result; |
20592 | } |
20593 | |
20594 | |
20595 | static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG, |
20596 | const X86Subtarget &Subtarget) { |
20597 | unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; |
20598 | SDLoc dl(Op); |
20599 | |
20600 | SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, |
20601 | MVT::f64); |
20602 | |
20603 | |
20604 | SDValue Load = |
20605 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(OpNo)); |
20606 | |
20607 | |
20608 | Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); |
20609 | |
20610 | |
20611 | SDValue Or = DAG.getNode( |
20612 | ISD::OR, dl, MVT::v2i64, |
20613 | DAG.getBitcast(MVT::v2i64, Load), |
20614 | DAG.getBitcast(MVT::v2i64, |
20615 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); |
20616 | Or = |
20617 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, |
20618 | DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl)); |
20619 | |
20620 | if (Op.getNode()->isStrictFPOpcode()) { |
20621 | |
20622 | |
20623 | SDValue Chain = Op.getOperand(0); |
20624 | SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, |
20625 | {Chain, Or, Bias}); |
20626 | |
20627 | if (Op.getValueType() == Sub.getValueType()) |
20628 | return Sub; |
20629 | |
20630 | |
20631 | std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound( |
20632 | Sub, Sub.getValue(1), dl, Op.getSimpleValueType()); |
20633 | |
20634 | return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl); |
20635 | } |
20636 | |
20637 | |
20638 | |
20639 | SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); |
20640 | |
20641 | |
20642 | return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType()); |
20643 | } |
20644 | |
20645 | static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, |
20646 | const X86Subtarget &Subtarget, |
20647 | const SDLoc &DL) { |
20648 | if (Op.getSimpleValueType() != MVT::v2f64) |
20649 | return SDValue(); |
20650 | |
20651 | bool IsStrict = Op->isStrictFPOpcode(); |
20652 | |
20653 | SDValue N0 = Op.getOperand(IsStrict ? 1 : 0); |
20654 | assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type"); |
20655 | |
20656 | if (Subtarget.hasAVX512()) { |
20657 | if (!Subtarget.hasVLX()) { |
20658 | |
20659 | if (!IsStrict) |
20660 | return SDValue(); |
20661 | |
20662 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, |
20663 | DAG.getConstant(0, DL, MVT::v2i32)); |
20664 | SDValue Res = DAG.getNode(Op->getOpcode(), DL, {MVT::v4f64, MVT::Other}, |
20665 | {Op.getOperand(0), N0}); |
20666 | SDValue Chain = Res.getValue(1); |
20667 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2f64, Res, |
20668 | DAG.getIntPtrConstant(0, DL)); |
20669 | return DAG.getMergeValues({Res, Chain}, DL); |
20670 | } |
20671 | |
20672 | |
20673 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, |
20674 | DAG.getUNDEF(MVT::v2i32)); |
20675 | if (IsStrict) |
20676 | return DAG.getNode(X86ISD::STRICT_CVTUI2P, DL, {MVT::v2f64, MVT::Other}, |
20677 | {Op.getOperand(0), N0}); |
20678 | return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0); |
20679 | } |
20680 | |
20681 | |
20682 | |
20683 | |
20684 | |
20685 | SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0); |
20686 | SDValue VBias = |
20687 | DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v2f64); |
20688 | SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn, |
20689 | DAG.getBitcast(MVT::v2i64, VBias)); |
20690 | Or = DAG.getBitcast(MVT::v2f64, Or); |
20691 | |
20692 | if (IsStrict) |
20693 | return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v2f64, MVT::Other}, |
20694 | {Op.getOperand(0), Or, VBias}); |
20695 | return DAG.getNode(ISD::FSUB, DL, MVT::v2f64, Or, VBias); |
20696 | } |
20697 | |
20698 | static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, |
20699 | const X86Subtarget &Subtarget) { |
20700 | SDLoc DL(Op); |
20701 | bool IsStrict = Op->isStrictFPOpcode(); |
20702 | SDValue V = Op->getOperand(IsStrict ? 1 : 0); |
20703 | MVT VecIntVT = V.getSimpleValueType(); |
20704 | assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) && |
20705 | "Unsupported custom type"); |
20706 | |
20707 | if (Subtarget.hasAVX512()) { |
20708 | |
20709 | assert(!Subtarget.hasVLX() && "Unexpected features"); |
20710 | MVT VT = Op->getSimpleValueType(0); |
20711 | |
20712 | |
20713 | if (VT == MVT::v8f64) |
20714 | return Op; |
20715 | |
20716 | assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) && |
20717 | "Unexpected VT!"); |
20718 | MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; |
20719 | MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; |
20720 | |
20721 | |
20722 | SDValue Tmp = |
20723 | IsStrict ? DAG.getConstant(0, DL, WideIntVT) : DAG.getUNDEF(WideIntVT); |
20724 | V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideIntVT, Tmp, V, |
20725 | DAG.getIntPtrConstant(0, DL)); |
20726 | SDValue Res, Chain; |
20727 | if (IsStrict) { |
20728 | Res = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {WideVT, MVT::Other}, |
20729 | {Op->getOperand(0), V}); |
20730 | Chain = Res.getValue(1); |
20731 | } else { |
20732 | Res = DAG.getNode(ISD::UINT_TO_FP, DL, WideVT, V); |
20733 | } |
20734 | |
20735 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
20736 | DAG.getIntPtrConstant(0, DL)); |
20737 | |
20738 | if (IsStrict) |
20739 | return DAG.getMergeValues({Res, Chain}, DL); |
20740 | return Res; |
20741 | } |
20742 | |
20743 | if (Subtarget.hasAVX() && VecIntVT == MVT::v4i32 && |
20744 | Op->getSimpleValueType(0) == MVT::v4f64) { |
20745 | SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i64, V); |
20746 | Constant *Bias = ConstantFP::get( |
20747 | *DAG.getContext(), |
20748 | APFloat(APFloat::IEEEdouble(), APInt(64, 0x4330000000000000ULL))); |
20749 | auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
20750 | SDValue CPIdx = DAG.getConstantPool(Bias, PtrVT, Align(8)); |
20751 | SDVTList Tys = DAG.getVTList(MVT::v4f64, MVT::Other); |
20752 | SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; |
20753 | SDValue VBias = DAG.getMemIntrinsicNode( |
20754 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64, |
20755 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(8), |
20756 | MachineMemOperand::MOLoad); |
20757 | |
20758 | SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn, |
20759 | DAG.getBitcast(MVT::v4i64, VBias)); |
20760 | Or = DAG.getBitcast(MVT::v4f64, Or); |
20761 | |
20762 | if (IsStrict) |
20763 | return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v4f64, MVT::Other}, |
20764 | {Op.getOperand(0), Or, VBias}); |
20765 | return DAG.getNode(ISD::FSUB, DL, MVT::v4f64, Or, VBias); |
20766 | } |
20767 | |
20768 | |
20769 | |
20770 | |
20771 | |
20772 | |
20773 | |
20774 | |
20775 | |
20776 | |
20777 | |
20778 | |
20779 | |
20780 | bool Is128 = VecIntVT == MVT::v4i32; |
20781 | MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; |
20782 | |
20783 | |
20784 | if (VecFloatVT != Op->getSimpleValueType(0)) |
20785 | return SDValue(); |
20786 | |
20787 | |
20788 | |
20789 | |
20790 | |
20791 | |
20792 | |
20793 | |
20794 | |
20795 | SDValue VecCstLow = DAG.getConstant(0x4b000000, DL, VecIntVT); |
20796 | |
20797 | SDValue VecCstHigh = DAG.getConstant(0x53000000, DL, VecIntVT); |
20798 | |
20799 | |
20800 | SDValue VecCstShift = DAG.getConstant(16, DL, VecIntVT); |
20801 | SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift); |
20802 | |
20803 | SDValue Low, High; |
20804 | if (Subtarget.hasSSE41()) { |
20805 | MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; |
20806 | |
20807 | SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow); |
20808 | SDValue VecBitcast = DAG.getBitcast(VecI16VT, V); |
20809 | |
20810 | |
20811 | Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, |
20812 | VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); |
20813 | |
20814 | |
20815 | SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); |
20816 | SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift); |
20817 | |
20818 | |
20819 | High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, |
20820 | VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); |
20821 | } else { |
20822 | SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT); |
20823 | |
20824 | SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask); |
20825 | Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow); |
20826 | |
20827 | |
20828 | High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh); |
20829 | } |
20830 | |
20831 | |
20832 | SDValue VecCstFSub = DAG.getConstantFP( |
20833 | APFloat(APFloat::IEEEsingle(), APInt(32, 0x53000080)), DL, VecFloatVT); |
20834 | |
20835 | |
20836 | |
20837 | |
20838 | |
20839 | SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); |
20840 | |
20841 | |
20842 | SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); |
20843 | |
20844 | if (IsStrict) { |
20845 | SDValue FHigh = DAG.getNode(ISD::STRICT_FSUB, DL, {VecFloatVT, MVT::Other}, |
20846 | {Op.getOperand(0), HighBitcast, VecCstFSub}); |
20847 | return DAG.getNode(ISD::STRICT_FADD, DL, {VecFloatVT, MVT::Other}, |
20848 | {FHigh.getValue(1), LowBitcast, FHigh}); |
20849 | } |
20850 | |
20851 | SDValue FHigh = |
20852 | DAG.getNode(ISD::FSUB, DL, VecFloatVT, HighBitcast, VecCstFSub); |
20853 | return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); |
20854 | } |
20855 | |
20856 | static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG, |
20857 | const X86Subtarget &Subtarget) { |
20858 | unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; |
20859 | SDValue N0 = Op.getOperand(OpNo); |
20860 | MVT SrcVT = N0.getSimpleValueType(); |
20861 | SDLoc dl(Op); |
20862 | |
20863 | switch (SrcVT.SimpleTy) { |
20864 | default: |
20865 | llvm_unreachable("Custom UINT_TO_FP is not supported!"); |
20866 | case MVT::v2i32: |
20867 | return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl); |
20868 | case MVT::v4i32: |
20869 | case MVT::v8i32: |
20870 | return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget); |
20871 | case MVT::v2i64: |
20872 | case MVT::v4i64: |
20873 | return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget); |
20874 | } |
20875 | } |
20876 | |
20877 | SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, |
20878 | SelectionDAG &DAG) const { |
20879 | bool IsStrict = Op->isStrictFPOpcode(); |
20880 | unsigned OpNo = IsStrict ? 1 : 0; |
20881 | SDValue Src = Op.getOperand(OpNo); |
20882 | SDLoc dl(Op); |
20883 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
20884 | MVT SrcVT = Src.getSimpleValueType(); |
20885 | MVT DstVT = Op->getSimpleValueType(0); |
20886 | SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); |
20887 | |
20888 | if (DstVT == MVT::f128) |
20889 | return SDValue(); |
20890 | |
20891 | if (DstVT.isVector()) |
20892 | return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); |
20893 | |
20894 | if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) |
20895 | return Extract; |
20896 | |
20897 | if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && |
20898 | (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) { |
20899 | |
20900 | |
20901 | return Op; |
20902 | } |
20903 | |
20904 | |
20905 | if (SrcVT == MVT::i32 && Subtarget.is64Bit()) { |
20906 | Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src); |
20907 | if (IsStrict) |
20908 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other}, |
20909 | {Chain, Src}); |
20910 | return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); |
20911 | } |
20912 | |
20913 | if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) |
20914 | return V; |
20915 | if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget)) |
20916 | return V; |
20917 | |
20918 | |
20919 | |
20920 | if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64 && !IsStrict) |
20921 | return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); |
20922 | if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80) |
20923 | return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); |
20924 | if (Subtarget.is64Bit() && SrcVT == MVT::i64 && |
20925 | (DstVT == MVT::f32 || DstVT == MVT::f64)) |
20926 | return SDValue(); |
20927 | |
20928 | |
20929 | SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64, 8); |
20930 | int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex(); |
20931 | Align SlotAlign(8); |
20932 | MachinePointerInfo MPI = |
20933 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI); |
20934 | if (SrcVT == MVT::i32) { |
20935 | SDValue OffsetSlot = |
20936 | DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl); |
20937 | SDValue Store1 = DAG.getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign); |
20938 | SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), |
20939 | OffsetSlot, MPI.getWithOffset(4), SlotAlign); |
20940 | std::pair<SDValue, SDValue> Tmp = |
20941 | BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, SlotAlign, DAG); |
20942 | if (IsStrict) |
20943 | return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); |
20944 | |
20945 | return Tmp.first; |
20946 | } |
20947 | |
20948 | assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); |
20949 | SDValue ValueToStore = Src; |
20950 | if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) { |
20951 | |
20952 | |
20953 | |
20954 | ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); |
20955 | } |
20956 | SDValue Store = |
20957 | DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, SlotAlign); |
20958 | |
20959 | |
20960 | |
20961 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
20962 | SDValue Ops[] = { Store, StackSlot }; |
20963 | SDValue Fild = |
20964 | DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MPI, |
20965 | SlotAlign, MachineMemOperand::MOLoad); |
20966 | Chain = Fild.getValue(1); |
20967 | |
20968 | |
20969 | |
20970 | SDValue SignSet = DAG.getSetCC( |
20971 | dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64), |
20972 | Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); |
20973 | |
20974 | |
20975 | APInt FF(64, 0x5F80000000000000ULL); |
20976 | SDValue FudgePtr = DAG.getConstantPool( |
20977 | ConstantInt::get(*DAG.getContext(), FF), PtrVT); |
20978 | Align CPAlignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlign(); |
20979 | |
20980 | |
20981 | SDValue Zero = DAG.getIntPtrConstant(0, dl); |
20982 | SDValue Four = DAG.getIntPtrConstant(4, dl); |
20983 | SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); |
20984 | FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset); |
20985 | |
20986 | |
20987 | SDValue Fudge = DAG.getExtLoad( |
20988 | ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr, |
20989 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, |
20990 | CPAlignment); |
20991 | Chain = Fudge.getValue(1); |
20992 | |
20993 | |
20994 | if (IsStrict) { |
20995 | SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other}, |
20996 | {Chain, Fild, Fudge}); |
20997 | |
20998 | if (DstVT == MVT::f80) |
20999 | return Add; |
21000 | return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other}, |
21001 | {Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)}); |
21002 | } |
21003 | SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); |
21004 | return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, |
21005 | DAG.getIntPtrConstant(0, dl)); |
21006 | } |
21007 | |
21008 | |
21009 | |
21010 | |
21011 | |
21012 | |
21013 | |
21014 | SDValue |
21015 | X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, |
21016 | bool IsSigned, SDValue &Chain) const { |
21017 | bool IsStrict = Op->isStrictFPOpcode(); |
21018 | SDLoc DL(Op); |
21019 | |
21020 | EVT DstTy = Op.getValueType(); |
21021 | SDValue Value = Op.getOperand(IsStrict ? 1 : 0); |
21022 | EVT TheVT = Value.getValueType(); |
21023 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
21024 | |
21025 | if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) { |
21026 | |
21027 | |
21028 | return SDValue(); |
21029 | } |
21030 | |
21031 | |
21032 | |
21033 | |
21034 | bool UnsignedFixup = !IsSigned && DstTy == MVT::i64; |
21035 | |
21036 | |
21037 | |
21038 | if (!IsSigned && DstTy != MVT::i64) { |
21039 | |
21040 | |
21041 | assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); |
21042 | DstTy = MVT::i64; |
21043 | } |
21044 | |
21045 | assert(DstTy.getSimpleVT() <= MVT::i64 && |
21046 | DstTy.getSimpleVT() >= MVT::i16 && |
21047 | "Unknown FP_TO_INT to lower!"); |
21048 | |
21049 | |
21050 | |
21051 | MachineFunction &MF = DAG.getMachineFunction(); |
21052 | unsigned MemSize = DstTy.getStoreSize(); |
21053 | int SSFI = |
21054 | MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false); |
21055 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
21056 | |
21057 | Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); |
21058 | |
21059 | SDValue Adjust; |
21060 | |
21061 | if (UnsignedFixup) { |
21062 | |
21063 | |
21064 | |
21065 | |
21066 | |
21067 | |
21068 | |
21069 | |
21070 | |
21071 | |
21072 | |
21073 | |
21074 | |
21075 | |
21076 | |
21077 | |
21078 | |
21079 | APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); |
21080 | LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; |
21081 | bool LosesInfo = false; |
21082 | if (TheVT == MVT::f64) |
21083 | |
21084 | Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, |
21085 | &LosesInfo); |
21086 | else if (TheVT == MVT::f80) |
21087 | Status = Thresh.convert(APFloat::x87DoubleExtended(), |
21088 | APFloat::rmNearestTiesToEven, &LosesInfo); |
21089 | |
21090 | assert(Status == APFloat::opOK && !LosesInfo && |
21091 | "FP conversion should have been exact"); |
21092 | |
21093 | SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT); |
21094 | |
21095 | EVT ResVT = getSetCCResultType(DAG.getDataLayout(), |
21096 | *DAG.getContext(), TheVT); |
21097 | SDValue Cmp; |
21098 | if (IsStrict) { |
21099 | Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE, Chain, |
21100 | true); |
21101 | Chain = Cmp.getValue(1); |
21102 | } else { |
21103 | Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE); |
21104 | } |
21105 | |
21106 | |
21107 | |
21108 | |
21109 | |
21110 | |
21111 | |
21112 | |
21113 | |
21114 | |
21115 | |
21116 | |
21117 | SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Cmp); |
21118 | SDValue Const63 = DAG.getConstant(63, DL, MVT::i8); |
21119 | Adjust = DAG.getNode(ISD::SHL, DL, MVT::i64, Zext, Const63); |
21120 | |
21121 | SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp, ThreshVal, |
21122 | DAG.getConstantFP(0.0, DL, TheVT)); |
21123 | |
21124 | if (IsStrict) { |
21125 | Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other}, |
21126 | { Chain, Value, FltOfs }); |
21127 | Chain = Value.getValue(1); |
21128 | } else |
21129 | Value = DAG.getNode(ISD::FSUB, DL, TheVT, Value, FltOfs); |
21130 | } |
21131 | |
21132 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); |
21133 | |
21134 | |
21135 | |
21136 | if (isScalarFPTypeInSSEReg(TheVT)) { |
21137 | assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); |
21138 | Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI); |
21139 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
21140 | SDValue Ops[] = { Chain, StackSlot }; |
21141 | |
21142 | unsigned FLDSize = TheVT.getStoreSize(); |
21143 | assert(FLDSize <= MemSize && "Stack slot not big enough"); |
21144 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
21145 | MPI, MachineMemOperand::MOLoad, FLDSize, Align(FLDSize)); |
21146 | Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, TheVT, MMO); |
21147 | Chain = Value.getValue(1); |
21148 | } |
21149 | |
21150 | |
21151 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
21152 | MPI, MachineMemOperand::MOStore, MemSize, Align(MemSize)); |
21153 | SDValue Ops[] = { Chain, Value, StackSlot }; |
21154 | SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL, |
21155 | DAG.getVTList(MVT::Other), |
21156 | Ops, DstTy, MMO); |
21157 | |
21158 | SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI); |
21159 | Chain = Res.getValue(1); |
21160 | |
21161 | |
21162 | if (UnsignedFixup) |
21163 | Res = DAG.getNode(ISD::XOR, DL, MVT::i64, Res, Adjust); |
21164 | |
21165 | return Res; |
21166 | } |
21167 | |
21168 | static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, |
21169 | const X86Subtarget &Subtarget) { |
21170 | MVT VT = Op.getSimpleValueType(); |
21171 | SDValue In = Op.getOperand(0); |
21172 | MVT InVT = In.getSimpleValueType(); |
21173 | SDLoc dl(Op); |
21174 | unsigned Opc = Op.getOpcode(); |
21175 | |
21176 | assert(VT.isVector() && InVT.isVector() && "Expected vector type"); |
21177 | assert((Opc == ISD::ANY_EXTEND || Opc == ISD::ZERO_EXTEND) && |
21178 | "Unexpected extension opcode"); |
21179 | assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && |
21180 | "Expected same number of elements"); |
21181 | assert((VT.getVectorElementType() == MVT::i16 || |
21182 | VT.getVectorElementType() == MVT::i32 || |
21183 | VT.getVectorElementType() == MVT::i64) && |
21184 | "Unexpected element type"); |
21185 | assert((InVT.getVectorElementType() == MVT::i8 || |
21186 | InVT.getVectorElementType() == MVT::i16 || |
21187 | InVT.getVectorElementType() == MVT::i32) && |
21188 | "Unexpected element type"); |
21189 | |
21190 | unsigned ExtendInVecOpc = getOpcode_EXTEND_VECTOR_INREG(Opc); |
21191 | |
21192 | if (VT == MVT::v32i16 && !Subtarget.hasBWI()) { |
21193 | assert(InVT == MVT::v32i8 && "Unexpected VT!"); |
21194 | return splitVectorIntUnary(Op, DAG); |
21195 | } |
21196 | |
21197 | if (Subtarget.hasInt256()) |
21198 | return Op; |
21199 | |
21200 | |
21201 | |
21202 | |
21203 | |
21204 | |
21205 | |
21206 | |
21207 | |
21208 | |
21209 | |
21210 | |
21211 | |
21212 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
21213 | SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In); |
21214 | |
21215 | |
21216 | |
21217 | if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(In)) |
21218 | if (hasIdenticalHalvesShuffleMask(Shuf->getMask())) |
21219 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpLo); |
21220 | |
21221 | SDValue ZeroVec = DAG.getConstant(0, dl, InVT); |
21222 | SDValue Undef = DAG.getUNDEF(InVT); |
21223 | bool NeedZero = Opc == ISD::ZERO_EXTEND; |
21224 | SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); |
21225 | OpHi = DAG.getBitcast(HalfVT, OpHi); |
21226 | |
21227 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); |
21228 | } |
21229 | |
21230 | |
21231 | static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In, |
21232 | const SDLoc &dl, SelectionDAG &DAG) { |
21233 | assert((VT == MVT::v16i8 || VT == MVT::v16i16) && "Unexpected VT."); |
21234 | SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, |
21235 | DAG.getIntPtrConstant(0, dl)); |
21236 | SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, |
21237 | DAG.getIntPtrConstant(8, dl)); |
21238 | Lo = DAG.getNode(ExtOpc, dl, MVT::v8i16, Lo); |
21239 | Hi = DAG.getNode(ExtOpc, dl, MVT::v8i16, Hi); |
21240 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i16, Lo, Hi); |
21241 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
21242 | } |
21243 | |
21244 | static SDValue LowerZERO_EXTEND_Mask(SDValue Op, |
21245 | const X86Subtarget &Subtarget, |
21246 | SelectionDAG &DAG) { |
21247 | MVT VT = Op->getSimpleValueType(0); |
21248 | SDValue In = Op->getOperand(0); |
21249 | MVT InVT = In.getSimpleValueType(); |
21250 | assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); |
21251 | SDLoc DL(Op); |
21252 | unsigned NumElts = VT.getVectorNumElements(); |
21253 | |
21254 | |
21255 | |
21256 | if (VT.getVectorElementType() != MVT::i8) { |
21257 | SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In); |
21258 | return DAG.getNode(ISD::SRL, DL, VT, Extend, |
21259 | DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); |
21260 | } |
21261 | |
21262 | |
21263 | MVT ExtVT = VT; |
21264 | if (!Subtarget.hasBWI()) { |
21265 | |
21266 | if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) |
21267 | return SplitAndExtendv16i1(ISD::ZERO_EXTEND, VT, In, DL, DAG); |
21268 | |
21269 | ExtVT = MVT::getVectorVT(MVT::i32, NumElts); |
21270 | } |
21271 | |
21272 | |
21273 | MVT WideVT = ExtVT; |
21274 | if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { |
21275 | NumElts *= 512 / ExtVT.getSizeInBits(); |
21276 | InVT = MVT::getVectorVT(MVT::i1, NumElts); |
21277 | In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT), |
21278 | In, DAG.getIntPtrConstant(0, DL)); |
21279 | WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), |
21280 | NumElts); |
21281 | } |
21282 | |
21283 | SDValue One = DAG.getConstant(1, DL, WideVT); |
21284 | SDValue Zero = DAG.getConstant(0, DL, WideVT); |
21285 | |
21286 | SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero); |
21287 | |
21288 | |
21289 | if (VT != ExtVT) { |
21290 | WideVT = MVT::getVectorVT(MVT::i8, NumElts); |
21291 | SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal); |
21292 | } |
21293 | |
21294 | |
21295 | if (WideVT != VT) |
21296 | SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal, |
21297 | DAG.getIntPtrConstant(0, DL)); |
21298 | |
21299 | return SelectedVal; |
21300 | } |
21301 | |
21302 | static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget, |
21303 | SelectionDAG &DAG) { |
21304 | SDValue In = Op.getOperand(0); |
21305 | MVT SVT = In.getSimpleValueType(); |
21306 | |
21307 | if (SVT.getVectorElementType() == MVT::i1) |
21308 | return LowerZERO_EXTEND_Mask(Op, Subtarget, DAG); |
21309 | |
21310 | assert(Subtarget.hasAVX() && "Expected AVX support"); |
21311 | return LowerAVXExtend(Op, DAG, Subtarget); |
21312 | } |
21313 | |
21314 | |
21315 | |
21316 | |
21317 | |
21318 | |
21319 | static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, |
21320 | const SDLoc &DL, SelectionDAG &DAG, |
21321 | const X86Subtarget &Subtarget) { |
21322 | assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) && |
21323 | "Unexpected PACK opcode"); |
21324 | assert(DstVT.isVector() && "VT not a vector?"); |
21325 | |
21326 | |
21327 | if (!Subtarget.hasSSE2()) |
21328 | return SDValue(); |
21329 | |
21330 | EVT SrcVT = In.getValueType(); |
21331 | |
21332 | |
21333 | if (SrcVT == DstVT) |
21334 | return In; |
21335 | |
21336 | |
21337 | |
21338 | unsigned DstSizeInBits = DstVT.getSizeInBits(); |
21339 | unsigned SrcSizeInBits = SrcVT.getSizeInBits(); |
21340 | if ((DstSizeInBits % 64) != 0 || (SrcSizeInBits % 128) != 0) |
21341 | return SDValue(); |
21342 | |
21343 | unsigned NumElems = SrcVT.getVectorNumElements(); |
21344 | if (!isPowerOf2_32(NumElems)) |
21345 | return SDValue(); |
21346 | |
21347 | LLVMContext &Ctx = *DAG.getContext(); |
21348 | assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation"); |
21349 | assert(SrcSizeInBits > DstSizeInBits && "Illegal truncation"); |
21350 | |
21351 | EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2); |
21352 | |
21353 | |
21354 | |
21355 | EVT InVT = MVT::i16, OutVT = MVT::i8; |
21356 | if (SrcVT.getScalarSizeInBits() > 16 && |
21357 | (Opcode == X86ISD::PACKSS || Subtarget.hasSSE41())) { |
21358 | InVT = MVT::i32; |
21359 | OutVT = MVT::i16; |
21360 | } |
21361 | |
21362 | |
21363 | if (SrcVT.is128BitVector()) { |
21364 | InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits()); |
21365 | OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits()); |
21366 | In = DAG.getBitcast(InVT, In); |
21367 | SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, DAG.getUNDEF(InVT)); |
21368 | Res = extractSubVector(Res, 0, DAG, DL, 64); |
21369 | return DAG.getBitcast(DstVT, Res); |
21370 | } |
21371 | |
21372 | |
21373 | SDValue Lo, Hi; |
21374 | std::tie(Lo, Hi) = splitVector(In, DAG, DL); |
21375 | |
21376 | unsigned SubSizeInBits = SrcSizeInBits / 2; |
21377 | InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits()); |
21378 | OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits()); |
21379 | |
21380 | |
21381 | if (SrcVT.is256BitVector() && DstVT.is128BitVector()) { |
21382 | Lo = DAG.getBitcast(InVT, Lo); |
21383 | Hi = DAG.getBitcast(InVT, Hi); |
21384 | SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi); |
21385 | return DAG.getBitcast(DstVT, Res); |
21386 | } |
21387 | |
21388 | |
21389 | |
21390 | if (SrcVT.is512BitVector() && Subtarget.hasInt256()) { |
21391 | Lo = DAG.getBitcast(InVT, Lo); |
21392 | Hi = DAG.getBitcast(InVT, Hi); |
21393 | SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi); |
21394 | |
21395 | |
21396 | |
21397 | |
21398 | SmallVector<int, 64> Mask; |
21399 | int Scale = 64 / OutVT.getScalarSizeInBits(); |
21400 | narrowShuffleMaskElts(Scale, { 0, 2, 1, 3 }, Mask); |
21401 | Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask); |
21402 | |
21403 | if (DstVT.is256BitVector()) |
21404 | return DAG.getBitcast(DstVT, Res); |
21405 | |
21406 | |
21407 | EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); |
21408 | Res = DAG.getBitcast(PackedVT, Res); |
21409 | return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget); |
21410 | } |
21411 | |
21412 | |
21413 | assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater"); |
21414 | EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2); |
21415 | Lo = truncateVectorWithPACK(Opcode, PackedVT, Lo, DL, DAG, Subtarget); |
21416 | Hi = truncateVectorWithPACK(Opcode, PackedVT, Hi, DL, DAG, Subtarget); |
21417 | |
21418 | PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); |
21419 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi); |
21420 | return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget); |
21421 | } |
21422 | |
21423 | static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, |
21424 | const X86Subtarget &Subtarget) { |
21425 | |
21426 | SDLoc DL(Op); |
21427 | MVT VT = Op.getSimpleValueType(); |
21428 | SDValue In = Op.getOperand(0); |
21429 | MVT InVT = In.getSimpleValueType(); |
21430 | |
21431 | assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type."); |
21432 | |
21433 | |
21434 | unsigned ShiftInx = InVT.getScalarSizeInBits() - 1; |
21435 | if (InVT.getScalarSizeInBits() <= 16) { |
21436 | if (Subtarget.hasBWI()) { |
21437 | |
21438 | if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { |
21439 | |
21440 | |
21441 | MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); |
21442 | In = DAG.getNode(ISD::SHL, DL, ExtVT, |
21443 | DAG.getBitcast(ExtVT, In), |
21444 | DAG.getConstant(ShiftInx, DL, ExtVT)); |
21445 | In = DAG.getBitcast(InVT, In); |
21446 | } |
21447 | return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), |
21448 | In, ISD::SETGT); |
21449 | } |
21450 | |
21451 | assert((InVT.is256BitVector() || InVT.is128BitVector()) && |
21452 | "Unexpected vector type."); |
21453 | unsigned NumElts = InVT.getVectorNumElements(); |
21454 | assert((NumElts == 8 || NumElts == 16) && "Unexpected number of elements"); |
21455 | |
21456 | |
21457 | |
21458 | |
21459 | |
21460 | |
21461 | |
21462 | |
21463 | |
21464 | if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) { |
21465 | SDValue Lo, Hi; |
21466 | if (InVT == MVT::v16i8) { |
21467 | Lo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, In); |
21468 | Hi = DAG.getVectorShuffle( |
21469 | InVT, DL, In, In, |
21470 | {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); |
21471 | Hi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, Hi); |
21472 | } else { |
21473 | assert(InVT == MVT::v16i16 && "Unexpected VT!"); |
21474 | Lo = extract128BitVector(In, 0, DAG, DL); |
21475 | Hi = extract128BitVector(In, 8, DAG, DL); |
21476 | } |
21477 | |
21478 | |
21479 | Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Lo); |
21480 | Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Hi); |
21481 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
21482 | } |
21483 | |
21484 | |
21485 | |
21486 | MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts); |
21487 | MVT ExtVT = MVT::getVectorVT(EltVT, NumElts); |
21488 | In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); |
21489 | InVT = ExtVT; |
21490 | ShiftInx = InVT.getScalarSizeInBits() - 1; |
21491 | } |
21492 | |
21493 | if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { |
21494 | |
21495 | In = DAG.getNode(ISD::SHL, DL, InVT, In, |
21496 | DAG.getConstant(ShiftInx, DL, InVT)); |
21497 | } |
21498 | |
21499 | if (Subtarget.hasDQI()) |
21500 | return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT); |
21501 | return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE); |
21502 | } |
21503 | |
21504 | SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { |
21505 | SDLoc DL(Op); |
21506 | MVT VT = Op.getSimpleValueType(); |
21507 | SDValue In = Op.getOperand(0); |
21508 | MVT InVT = In.getSimpleValueType(); |
21509 | unsigned InNumEltBits = InVT.getScalarSizeInBits(); |
21510 | |
21511 | assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && |
21512 | "Invalid TRUNCATE operation"); |
21513 | |
21514 | |
21515 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
21516 | if (!TLI.isTypeLegal(InVT)) { |
21517 | if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) && |
21518 | VT.is128BitVector()) { |
21519 | assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) && |
21520 | "Unexpected subtarget!"); |
21521 | |
21522 | |
21523 | |
21524 | SDValue Lo, Hi; |
21525 | std::tie(Lo, Hi) = DAG.SplitVector(In, DL); |
21526 | |
21527 | EVT LoVT, HiVT; |
21528 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
21529 | |
21530 | Lo = DAG.getNode(ISD::TRUNCATE, DL, LoVT, Lo); |
21531 | Hi = DAG.getNode(ISD::TRUNCATE, DL, HiVT, Hi); |
21532 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
21533 | } |
21534 | |
21535 | |
21536 | return SDValue(); |
21537 | } |
21538 | |
21539 | if (VT.getVectorElementType() == MVT::i1) |
21540 | return LowerTruncateVecI1(Op, DAG, Subtarget); |
21541 | |
21542 | |
21543 | if (Subtarget.hasAVX512()) { |
21544 | if (InVT == MVT::v32i16 && !Subtarget.hasBWI()) { |
21545 | assert(VT == MVT::v32i8 && "Unexpected VT!"); |
21546 | return splitVectorIntUnary(Op, DAG); |
21547 | } |
21548 | |
21549 | |
21550 | |
21551 | |
21552 | |
21553 | if (InVT != MVT::v16i16 || Subtarget.hasBWI() || |
21554 | Subtarget.canExtendTo512DQ()) |
21555 | return Op; |
21556 | } |
21557 | |
21558 | unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16); |
21559 | unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; |
21560 | |
21561 | |
21562 | |
21563 | |
21564 | KnownBits Known = DAG.computeKnownBits(In); |
21565 | if ((InNumEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) |
21566 | if (SDValue V = |
21567 | truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget)) |
21568 | return V; |
21569 | |
21570 | |
21571 | |
21572 | if ((InNumEltBits - NumPackedSignBits) < DAG.ComputeNumSignBits(In)) |
21573 | if (SDValue V = |
21574 | truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget)) |
21575 | return V; |
21576 | |
21577 | |
21578 | assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); |
21579 | |
21580 | if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { |
21581 | In = DAG.getBitcast(MVT::v8i32, In); |
21582 | |
21583 | |
21584 | if (Subtarget.hasInt256()) { |
21585 | static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; |
21586 | In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask); |
21587 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, |
21588 | DAG.getIntPtrConstant(0, DL)); |
21589 | } |
21590 | |
21591 | SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, |
21592 | DAG.getIntPtrConstant(0, DL)); |
21593 | SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, |
21594 | DAG.getIntPtrConstant(4, DL)); |
21595 | static const int ShufMask[] = {0, 2, 4, 6}; |
21596 | return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); |
21597 | } |
21598 | |
21599 | if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { |
21600 | In = DAG.getBitcast(MVT::v32i8, In); |
21601 | |
21602 | |
21603 | if (Subtarget.hasInt256()) { |
21604 | |
21605 | static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13, |
21606 | -1, -1, -1, -1, -1, -1, -1, -1, |
21607 | 16, 17, 20, 21, 24, 25, 28, 29, |
21608 | -1, -1, -1, -1, -1, -1, -1, -1 }; |
21609 | In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1); |
21610 | In = DAG.getBitcast(MVT::v4i64, In); |
21611 | |
21612 | static const int ShufMask2[] = {0, 2, -1, -1}; |
21613 | In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2); |
21614 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, |
21615 | DAG.getBitcast(MVT::v16i16, In), |
21616 | DAG.getIntPtrConstant(0, DL)); |
21617 | } |
21618 | |
21619 | SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, |
21620 | DAG.getIntPtrConstant(0, DL)); |
21621 | SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, |
21622 | DAG.getIntPtrConstant(16, DL)); |
21623 | |
21624 | |
21625 | static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, |
21626 | -1, -1, -1, -1, -1, -1, -1, -1}; |
21627 | |
21628 | OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1); |
21629 | OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1); |
21630 | |
21631 | OpLo = DAG.getBitcast(MVT::v4i32, OpLo); |
21632 | OpHi = DAG.getBitcast(MVT::v4i32, OpHi); |
21633 | |
21634 | |
21635 | static const int ShufMask2[] = {0, 1, 4, 5}; |
21636 | SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2); |
21637 | return DAG.getBitcast(MVT::v8i16, res); |
21638 | } |
21639 | |
21640 | if (VT == MVT::v16i8 && InVT == MVT::v16i16) { |
21641 | |
21642 | In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT)); |
21643 | |
21644 | SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In, |
21645 | DAG.getIntPtrConstant(0, DL)); |
21646 | SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In, |
21647 | DAG.getIntPtrConstant(8, DL)); |
21648 | return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi); |
21649 | } |
21650 | |
21651 | llvm_unreachable("All 256->128 cases should have been handled above!"); |
21652 | } |
21653 | |
21654 | |
21655 | |
21656 | static SDValue expandFP_TO_UINT_SSE(MVT VT, SDValue Src, const SDLoc &dl, |
21657 | SelectionDAG &DAG, |
21658 | const X86Subtarget &Subtarget) { |
21659 | MVT SrcVT = Src.getSimpleValueType(); |
21660 | unsigned DstBits = VT.getScalarSizeInBits(); |
21661 | assert(DstBits == 32 && "expandFP_TO_UINT_SSE - only vXi32 supported"); |
21662 | |
21663 | |
21664 | |
21665 | SDValue Small = DAG.getNode(X86ISD::CVTTP2SI, dl, VT, Src); |
21666 | SDValue Big = |
21667 | DAG.getNode(X86ISD::CVTTP2SI, dl, VT, |
21668 | DAG.getNode(ISD::FSUB, dl, SrcVT, Src, |
21669 | DAG.getConstantFP(2147483648.0f, dl, SrcVT))); |
21670 | |
21671 | |
21672 | |
21673 | |
21674 | |
21675 | |
21676 | |
21677 | |
21678 | |
21679 | |
21680 | if (VT == MVT::v8i32 && !Subtarget.hasAVX2()) { |
21681 | SDValue Overflow = DAG.getNode(ISD::OR, dl, VT, Small, Big); |
21682 | return DAG.getNode(X86ISD::BLENDV, dl, VT, Small, Overflow, Small); |
21683 | } |
21684 | |
21685 | SDValue IsOverflown = |
21686 | DAG.getNode(X86ISD::VSRAI, dl, VT, Small, |
21687 | DAG.getTargetConstant(DstBits - 1, dl, MVT::i8)); |
21688 | return DAG.getNode(ISD::OR, dl, VT, Small, |
21689 | DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown)); |
21690 | } |
21691 | |
21692 | SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { |
21693 | bool IsStrict = Op->isStrictFPOpcode(); |
21694 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || |
21695 | Op.getOpcode() == ISD::STRICT_FP_TO_SINT; |
21696 | MVT VT = Op->getSimpleValueType(0); |
21697 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
21698 | SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue(); |
21699 | MVT SrcVT = Src.getSimpleValueType(); |
21700 | SDLoc dl(Op); |
21701 | |
21702 | SDValue Res; |
21703 | if (VT.isVector()) { |
21704 | if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) { |
21705 | MVT ResVT = MVT::v4i32; |
21706 | MVT TruncVT = MVT::v4i1; |
21707 | unsigned Opc; |
21708 | if (IsStrict) |
21709 | Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
21710 | else |
21711 | Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
21712 | |
21713 | if (!IsSigned && !Subtarget.hasVLX()) { |
21714 | assert(Subtarget.useAVX512Regs() && "Unexpected features!"); |
21715 | |
21716 | ResVT = MVT::v8i32; |
21717 | TruncVT = MVT::v8i1; |
21718 | Opc = Op.getOpcode(); |
21719 | |
21720 | |
21721 | |
21722 | SDValue Tmp = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v8f64) |
21723 | : DAG.getUNDEF(MVT::v8f64); |
21724 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src, |
21725 | DAG.getIntPtrConstant(0, dl)); |
21726 | } |
21727 | if (IsStrict) { |
21728 | Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Chain, Src}); |
21729 | Chain = Res.getValue(1); |
21730 | } else { |
21731 | Res = DAG.getNode(Opc, dl, ResVT, Src); |
21732 | } |
21733 | |
21734 | Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res); |
21735 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res, |
21736 | DAG.getIntPtrConstant(0, dl)); |
21737 | if (IsStrict) |
21738 | return DAG.getMergeValues({Res, Chain}, dl); |
21739 | return Res; |
21740 | } |
21741 | |
21742 | if (Subtarget.hasFP16() && SrcVT.getVectorElementType() == MVT::f16) { |
21743 | if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16) |
21744 | return Op; |
21745 | |
21746 | MVT ResVT = VT; |
21747 | MVT EleVT = VT.getVectorElementType(); |
21748 | if (EleVT != MVT::i64) |
21749 | ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16; |
21750 | |
21751 | if (SrcVT != MVT::v8f16) { |
21752 | SDValue Tmp = |
21753 | IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT); |
21754 | SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp); |
21755 | Ops[0] = Src; |
21756 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops); |
21757 | } |
21758 | |
21759 | if (IsStrict) { |
21760 | Res = DAG.getNode(IsSigned ? X86ISD::STRICT_CVTTP2SI |
21761 | : X86ISD::STRICT_CVTTP2UI, |
21762 | dl, {ResVT, MVT::Other}, {Chain, Src}); |
21763 | Chain = Res.getValue(1); |
21764 | } else { |
21765 | Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, |
21766 | ResVT, Src); |
21767 | } |
21768 | |
21769 | |
21770 | if (EleVT.getSizeInBits() < 16) { |
21771 | ResVT = MVT::getVectorVT(EleVT, 8); |
21772 | Res = DAG.getNode(ISD::TRUNCATE, dl, ResVT, Res); |
21773 | } |
21774 | |
21775 | if (ResVT != VT) |
21776 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, |
21777 | DAG.getIntPtrConstant(0, dl)); |
21778 | |
21779 | if (IsStrict) |
21780 | return DAG.getMergeValues({Res, Chain}, dl); |
21781 | return Res; |
21782 | } |
21783 | |
21784 | if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) { |
21785 | if (IsStrict) { |
21786 | Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT |
21787 | : ISD::STRICT_FP_TO_UINT, |
21788 | dl, {MVT::v8i32, MVT::Other}, {Chain, Src}); |
21789 | Chain = Res.getValue(1); |
21790 | } else { |
21791 | Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, |
21792 | MVT::v8i32, Src); |
21793 | } |
21794 | |
21795 | |
21796 | Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res); |
21797 | |
21798 | if (IsStrict) |
21799 | return DAG.getMergeValues({Res, Chain}, dl); |
21800 | return Res; |
21801 | } |
21802 | |
21803 | |
21804 | if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) { |
21805 | assert(!IsSigned && "Expected unsigned conversion!"); |
21806 | assert(Subtarget.useAVX512Regs() && "Requires avx512f"); |
21807 | return Op; |
21808 | } |
21809 | |
21810 | |
21811 | if ((VT == MVT::v4i32 || VT == MVT::v8i32) && |
21812 | (SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v8f32) && |
21813 | Subtarget.useAVX512Regs()) { |
21814 | assert(!IsSigned && "Expected unsigned conversion!"); |
21815 | assert(!Subtarget.hasVLX() && "Unexpected features!"); |
21816 | MVT WideVT = SrcVT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; |
21817 | MVT ResVT = SrcVT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; |
21818 | |
21819 | |
21820 | |
21821 | SDValue Tmp = |
21822 | IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT); |
21823 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src, |
21824 | DAG.getIntPtrConstant(0, dl)); |
21825 | |
21826 | if (IsStrict) { |
21827 | Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other}, |
21828 | {Chain, Src}); |
21829 | Chain = Res.getValue(1); |
21830 | } else { |
21831 | Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src); |
21832 | } |
21833 | |
21834 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, |
21835 | DAG.getIntPtrConstant(0, dl)); |
21836 | |
21837 | if (IsStrict) |
21838 | return DAG.getMergeValues({Res, Chain}, dl); |
21839 | return Res; |
21840 | } |
21841 | |
21842 | |
21843 | if ((VT == MVT::v2i64 || VT == MVT::v4i64) && |
21844 | (SrcVT == MVT::v2f64 || SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32) && |
21845 | Subtarget.useAVX512Regs() && Subtarget.hasDQI()) { |
21846 | assert(!Subtarget.hasVLX() && "Unexpected features!"); |
21847 | MVT WideVT = SrcVT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64; |
21848 | |
21849 | |
21850 | |
21851 | SDValue Tmp = |
21852 | IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT); |
21853 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src, |
21854 | DAG.getIntPtrConstant(0, dl)); |
21855 | |
21856 | if (IsStrict) { |
21857 | Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other}, |
21858 | {Chain, Src}); |
21859 | Chain = Res.getValue(1); |
21860 | } else { |
21861 | Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src); |
21862 | } |
21863 | |
21864 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, |
21865 | DAG.getIntPtrConstant(0, dl)); |
21866 | |
21867 | if (IsStrict) |
21868 | return DAG.getMergeValues({Res, Chain}, dl); |
21869 | return Res; |
21870 | } |
21871 | |
21872 | if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) { |
21873 | if (!Subtarget.hasVLX()) { |
21874 | |
21875 | |
21876 | if (!IsStrict) |
21877 | return SDValue(); |
21878 | |
21879 | SDValue Zero = DAG.getConstantFP(0.0, dl, MVT::v2f32); |
21880 | SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32, |
21881 | {Src, Zero, Zero, Zero}); |
21882 | Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other}, |
21883 | {Chain, Tmp}); |
21884 | SDValue Chain = Tmp.getValue(1); |
21885 | Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp, |
21886 | DAG.getIntPtrConstant(0, dl)); |
21887 | return DAG.getMergeValues({Tmp, Chain}, dl); |
21888 | } |
21889 | |
21890 | assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL"); |
21891 | SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, |
21892 | DAG.getUNDEF(MVT::v2f32)); |
21893 | if (IsStrict) { |
21894 | unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI |
21895 | : X86ISD::STRICT_CVTTP2UI; |
21896 | return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp}); |
21897 | } |
21898 | unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
21899 | return DAG.getNode(Opc, dl, VT, Tmp); |
21900 | } |
21901 | |
21902 | |
21903 | |
21904 | if ((VT == MVT::v4i32 && SrcVT == MVT::v4f32) || |
21905 | (VT == MVT::v4i32 && SrcVT == MVT::v4f64) || |
21906 | (VT == MVT::v8i32 && SrcVT == MVT::v8f32)) { |
21907 | assert(!IsSigned && "Expected unsigned conversion!"); |
21908 | return expandFP_TO_UINT_SSE(VT, Src, dl, DAG, Subtarget); |
21909 | } |
21910 | |
21911 | return SDValue(); |
21912 | } |
21913 | |
21914 | assert(!VT.isVector()); |
21915 | |
21916 | bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT); |
21917 | |
21918 | if (!IsSigned && UseSSEReg) { |
21919 | |
21920 | if (Subtarget.hasAVX512()) |
21921 | return Op; |
21922 | |
21923 | |
21924 | |
21925 | if (!IsStrict && ((VT == MVT::i32 && !Subtarget.is64Bit()) || |
21926 | (VT == MVT::i64 && Subtarget.is64Bit()))) { |
21927 | unsigned DstBits = VT.getScalarSizeInBits(); |
21928 | APInt UIntLimit = APInt::getSignMask(DstBits); |
21929 | SDValue FloatOffset = DAG.getNode(ISD::UINT_TO_FP, dl, SrcVT, |
21930 | DAG.getConstant(UIntLimit, dl, VT)); |
21931 | MVT SrcVecVT = MVT::getVectorVT(SrcVT, 128 / SrcVT.getScalarSizeInBits()); |
21932 | |
21933 | |
21934 | |
21935 | |
21936 | SDValue Small = |
21937 | DAG.getNode(X86ISD::CVTTS2SI, dl, VT, |
21938 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, SrcVecVT, Src)); |
21939 | SDValue Big = DAG.getNode( |
21940 | X86ISD::CVTTS2SI, dl, VT, |
21941 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, SrcVecVT, |
21942 | DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FloatOffset))); |
21943 | |
21944 | |
21945 | |
21946 | |
21947 | |
21948 | |
21949 | |
21950 | SDValue IsOverflown = DAG.getNode( |
21951 | ISD::SRA, dl, VT, Small, DAG.getConstant(DstBits - 1, dl, MVT::i8)); |
21952 | return DAG.getNode(ISD::OR, dl, VT, Small, |
21953 | DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown)); |
21954 | } |
21955 | |
21956 | |
21957 | if (VT == MVT::i64) |
21958 | return SDValue(); |
21959 | |
21960 | assert(VT == MVT::i32 && "Unexpected VT!"); |
21961 | |
21962 | |
21963 | |
21964 | |
21965 | if (Subtarget.is64Bit()) { |
21966 | if (IsStrict) { |
21967 | Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i64, MVT::Other}, |
21968 | {Chain, Src}); |
21969 | Chain = Res.getValue(1); |
21970 | } else |
21971 | Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src); |
21972 | |
21973 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
21974 | if (IsStrict) |
21975 | return DAG.getMergeValues({Res, Chain}, dl); |
21976 | return Res; |
21977 | } |
21978 | |
21979 | |
21980 | |
21981 | if (!Subtarget.hasSSE3()) |
21982 | return SDValue(); |
21983 | } |
21984 | |
21985 | |
21986 | |
21987 | |
21988 | if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) { |
21989 | assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!"); |
21990 | if (IsStrict) { |
21991 | Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i32, MVT::Other}, |
21992 | {Chain, Src}); |
21993 | Chain = Res.getValue(1); |
21994 | } else |
21995 | Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); |
21996 | |
21997 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
21998 | if (IsStrict) |
21999 | return DAG.getMergeValues({Res, Chain}, dl); |
22000 | return Res; |
22001 | } |
22002 | |
22003 | |
22004 | if (UseSSEReg && IsSigned) |
22005 | return Op; |
22006 | |
22007 | |
22008 | if (SrcVT == MVT::f128) { |
22009 | RTLIB::Libcall LC; |
22010 | if (IsSigned) |
22011 | LC = RTLIB::getFPTOSINT(SrcVT, VT); |
22012 | else |
22013 | LC = RTLIB::getFPTOUINT(SrcVT, VT); |
22014 | |
22015 | MakeLibCallOptions CallOptions; |
22016 | std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions, |
22017 | SDLoc(Op), Chain); |
22018 | |
22019 | if (IsStrict) |
22020 | return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); |
22021 | |
22022 | return Tmp.first; |
22023 | } |
22024 | |
22025 | |
22026 | if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) { |
22027 | if (IsStrict) |
22028 | return DAG.getMergeValues({V, Chain}, dl); |
22029 | return V; |
22030 | } |
22031 | |
22032 | llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases."); |
22033 | } |
22034 | |
22035 | SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op, |
22036 | SelectionDAG &DAG) const { |
22037 | SDValue Src = Op.getOperand(0); |
22038 | MVT SrcVT = Src.getSimpleValueType(); |
22039 | |
22040 | |
22041 | if (isScalarFPTypeInSSEReg(SrcVT)) |
22042 | return Op; |
22043 | |
22044 | return LRINT_LLRINTHelper(Op.getNode(), DAG); |
22045 | } |
22046 | |
22047 | SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N, |
22048 | SelectionDAG &DAG) const { |
22049 | EVT DstVT = N->getValueType(0); |
22050 | SDValue Src = N->getOperand(0); |
22051 | EVT SrcVT = Src.getValueType(); |
22052 | |
22053 | if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) { |
22054 | |
22055 | |
22056 | return SDValue(); |
22057 | } |
22058 | |
22059 | SDLoc DL(N); |
22060 | SDValue Chain = DAG.getEntryNode(); |
22061 | |
22062 | bool UseSSE = isScalarFPTypeInSSEReg(SrcVT); |
22063 | |
22064 | |
22065 | |
22066 | EVT OtherVT = UseSSE ? SrcVT : DstVT; |
22067 | SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT); |
22068 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
22069 | MachinePointerInfo MPI = |
22070 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
22071 | |
22072 | if (UseSSE) { |
22073 | assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!"); |
22074 | Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI); |
22075 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
22076 | SDValue Ops[] = { Chain, StackPtr }; |
22077 | |
22078 | Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI, |
22079 | None, MachineMemOperand::MOLoad); |
22080 | Chain = Src.getValue(1); |
22081 | } |
22082 | |
22083 | SDValue StoreOps[] = { Chain, Src, StackPtr }; |
22084 | Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, DAG.getVTList(MVT::Other), |
22085 | StoreOps, DstVT, MPI, None, |
22086 | MachineMemOperand::MOStore); |
22087 | |
22088 | return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI); |
22089 | } |
22090 | |
22091 | SDValue |
22092 | X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const { |
22093 | |
22094 | |
22095 | SDNode *Node = Op.getNode(); |
22096 | bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT; |
22097 | unsigned FpToIntOpcode = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; |
22098 | SDLoc dl(SDValue(Node, 0)); |
22099 | SDValue Src = Node->getOperand(0); |
22100 | |
22101 | |
22102 | |
22103 | |
22104 | |
22105 | EVT SrcVT = Src.getValueType(); |
22106 | EVT DstVT = Node->getValueType(0); |
22107 | EVT TmpVT = DstVT; |
22108 | |
22109 | |
22110 | |
22111 | if (!isScalarFPTypeInSSEReg(SrcVT)) |
22112 | return SDValue(); |
22113 | |
22114 | EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); |
22115 | unsigned SatWidth = SatVT.getScalarSizeInBits(); |
22116 | unsigned DstWidth = DstVT.getScalarSizeInBits(); |
22117 | unsigned TmpWidth = TmpVT.getScalarSizeInBits(); |
22118 | assert(SatWidth <= DstWidth && SatWidth <= TmpWidth && |
22119 | "Expected saturation width smaller than result width"); |
22120 | |
22121 | |
22122 | if (TmpWidth < 32) { |
22123 | TmpVT = MVT::i32; |
22124 | TmpWidth = 32; |
22125 | } |
22126 | |
22127 | |
22128 | |
22129 | if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) { |
22130 | TmpVT = MVT::i64; |
22131 | TmpWidth = 64; |
22132 | } |
22133 | |
22134 | |
22135 | |
22136 | if (SatWidth < TmpWidth) |
22137 | FpToIntOpcode = ISD::FP_TO_SINT; |
22138 | |
22139 | |
22140 | |
22141 | APInt MinInt, MaxInt; |
22142 | if (IsSigned) { |
22143 | MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); |
22144 | MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); |
22145 | } else { |
22146 | MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); |
22147 | MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); |
22148 | } |
22149 | |
22150 | APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT)); |
22151 | APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT)); |
22152 | |
22153 | APFloat::opStatus MinStatus = MinFloat.convertFromAPInt( |
22154 | MinInt, IsSigned, APFloat::rmTowardZero); |
22155 | APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt( |
22156 | MaxInt, IsSigned, APFloat::rmTowardZero); |
22157 | bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) |
22158 | && !(MaxStatus & APFloat::opStatus::opInexact); |
22159 | |
22160 | SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT); |
22161 | SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT); |
22162 | |
22163 | |
22164 | |
22165 | if (AreExactFloatBounds) { |
22166 | if (DstVT != TmpVT) { |
22167 | |
22168 | SDValue MinClamped = DAG.getNode( |
22169 | X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src); |
22170 | |
22171 | SDValue BothClamped = DAG.getNode( |
22172 | X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped); |
22173 | |
22174 | SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped); |
22175 | |
22176 | |
22177 | |
22178 | return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt); |
22179 | } |
22180 | |
22181 | |
22182 | SDValue MinClamped = DAG.getNode( |
22183 | X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode); |
22184 | |
22185 | SDValue BothClamped = DAG.getNode( |
22186 | X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode); |
22187 | |
22188 | SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped); |
22189 | |
22190 | if (!IsSigned) { |
22191 | |
22192 | |
22193 | return FpToInt; |
22194 | } |
22195 | |
22196 | |
22197 | SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); |
22198 | return DAG.getSelectCC( |
22199 | dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); |
22200 | } |
22201 | |
22202 | SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT); |
22203 | SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT); |
22204 | |
22205 | |
22206 | SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, Src); |
22207 | |
22208 | if (DstVT != TmpVT) { |
22209 | |
22210 | |
22211 | FpToInt = DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt); |
22212 | } |
22213 | |
22214 | SDValue Select = FpToInt; |
22215 | |
22216 | |
22217 | |
22218 | if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) { |
22219 | |
22220 | |
22221 | Select = DAG.getSelectCC( |
22222 | dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT); |
22223 | } |
22224 | |
22225 | |
22226 | Select = DAG.getSelectCC( |
22227 | dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT); |
22228 | |
22229 | |
22230 | |
22231 | if (!IsSigned || DstVT != TmpVT) { |
22232 | return Select; |
22233 | } |
22234 | |
22235 | |
22236 | SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); |
22237 | return DAG.getSelectCC( |
22238 | dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO); |
22239 | } |
22240 | |
22241 | SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { |
22242 | bool IsStrict = Op->isStrictFPOpcode(); |
22243 | |
22244 | SDLoc DL(Op); |
22245 | MVT VT = Op.getSimpleValueType(); |
22246 | SDValue In = Op.getOperand(IsStrict ? 1 : 0); |
22247 | MVT SVT = In.getSimpleValueType(); |
22248 | |
22249 | if (VT == MVT::f128) |
22250 | return SDValue(); |
22251 | |
22252 | if (VT == MVT::f80) { |
22253 | if (SVT == MVT::f16) { |
22254 | assert(Subtarget.hasFP16() && "Unexpected features!"); |
22255 | RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT); |
22256 | MakeLibCallOptions CallOptions; |
22257 | std::pair<SDValue, SDValue> Tmp = |
22258 | makeLibCall(DAG, LC, VT, In, CallOptions, DL, |
22259 | IsStrict ? Op.getOperand(0) : SDValue()); |
22260 | if (IsStrict) |
22261 | return DAG.getMergeValues({Tmp.first, Tmp.second}, DL); |
22262 | else |
22263 | return Tmp.first; |
22264 | } |
22265 | return Op; |
22266 | } |
22267 | |
22268 | if (SVT.getVectorElementType() == MVT::f16) { |
22269 | assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!"); |
22270 | if (SVT == MVT::v2f16) |
22271 | In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In, |
22272 | DAG.getUNDEF(MVT::v2f16)); |
22273 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f16, In, |
22274 | DAG.getUNDEF(MVT::v4f16)); |
22275 | if (IsStrict) |
22276 | return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, |
22277 | {Op->getOperand(0), Res}); |
22278 | return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); |
22279 | } |
22280 | |
22281 | assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); |
22282 | |
22283 | SDValue Res = |
22284 | DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT)); |
22285 | if (IsStrict) |
22286 | return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, |
22287 | {Op->getOperand(0), Res}); |
22288 | return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); |
22289 | } |
22290 | |
22291 | SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { |
22292 | bool IsStrict = Op->isStrictFPOpcode(); |
22293 | SDValue In = Op.getOperand(IsStrict ? 1 : 0); |
22294 | MVT VT = Op.getSimpleValueType(); |
22295 | MVT SVT = In.getSimpleValueType(); |
22296 | |
22297 | |
22298 | if (SVT != MVT::f128 && !(VT == MVT::f16 && SVT == MVT::f80)) |
22299 | return Op; |
22300 | |
22301 | return SDValue(); |
22302 | } |
22303 | |
22304 | static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) { |
22305 | bool IsStrict = Op->isStrictFPOpcode(); |
22306 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
22307 | assert(Src.getValueType() == MVT::i16 && Op.getValueType() == MVT::f32 && |
22308 | "Unexpected VT!"); |
22309 | |
22310 | SDLoc dl(Op); |
22311 | SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, |
22312 | DAG.getConstant(0, dl, MVT::v8i16), Src, |
22313 | DAG.getIntPtrConstant(0, dl)); |
22314 | |
22315 | SDValue Chain; |
22316 | if (IsStrict) { |
22317 | Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {MVT::v4f32, MVT::Other}, |
22318 | {Op.getOperand(0), Res}); |
22319 | Chain = Res.getValue(1); |
22320 | } else { |
22321 | Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res); |
22322 | } |
22323 | |
22324 | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, |
22325 | DAG.getIntPtrConstant(0, dl)); |
22326 | |
22327 | if (IsStrict) |
22328 | return DAG.getMergeValues({Res, Chain}, dl); |
22329 | |
22330 | return Res; |
22331 | } |
22332 | |
22333 | static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) { |
22334 | bool IsStrict = Op->isStrictFPOpcode(); |
22335 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
22336 | assert(Src.getValueType() == MVT::f32 && Op.getValueType() == MVT::i16 && |
22337 | "Unexpected VT!"); |
22338 | |
22339 | SDLoc dl(Op); |
22340 | SDValue Res, Chain; |
22341 | if (IsStrict) { |
22342 | Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4f32, |
22343 | DAG.getConstantFP(0, dl, MVT::v4f32), Src, |
22344 | DAG.getIntPtrConstant(0, dl)); |
22345 | Res = DAG.getNode( |
22346 | X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other}, |
22347 | {Op.getOperand(0), Res, DAG.getTargetConstant(4, dl, MVT::i32)}); |
22348 | Chain = Res.getValue(1); |
22349 | } else { |
22350 | |
22351 | Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, Src); |
22352 | Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res, |
22353 | DAG.getTargetConstant(4, dl, MVT::i32)); |
22354 | } |
22355 | |
22356 | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Res, |
22357 | DAG.getIntPtrConstant(0, dl)); |
22358 | |
22359 | if (IsStrict) |
22360 | return DAG.getMergeValues({Res, Chain}, dl); |
22361 | |
22362 | return Res; |
22363 | } |
22364 | |
22365 | |
22366 | |
22367 | static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, |
22368 | const X86Subtarget &Subtarget) { |
22369 | |
22370 | SDValue LHS = Op.getOperand(0); |
22371 | SDValue RHS = Op.getOperand(1); |
22372 | if (!LHS.hasOneUse() && !RHS.hasOneUse()) |
22373 | return Op; |
22374 | |
22375 | |
22376 | bool IsFP = Op.getSimpleValueType().isFloatingPoint(); |
22377 | if (IsFP && !Subtarget.hasSSE3()) |
22378 | return Op; |
22379 | if (!IsFP && !Subtarget.hasSSSE3()) |
22380 | return Op; |
22381 | |
22382 | |
22383 | if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
22384 | RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
22385 | LHS.getOperand(0) != RHS.getOperand(0) || |
22386 | !isa<ConstantSDNode>(LHS.getOperand(1)) || |
22387 | !isa<ConstantSDNode>(RHS.getOperand(1)) || |
22388 | !shouldUseHorizontalOp(true, DAG, Subtarget)) |
22389 | return Op; |
22390 | |
22391 | |
22392 | |
22393 | unsigned HOpcode; |
22394 | switch (Op.getOpcode()) { |
22395 | case ISD::ADD: HOpcode = X86ISD::HADD; break; |
22396 | case ISD::SUB: HOpcode = X86ISD::HSUB; break; |
22397 | case ISD::FADD: HOpcode = X86ISD::FHADD; break; |
22398 | case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; |
22399 | default: |
22400 | llvm_unreachable("Trying to lower unsupported opcode to horizontal op"); |
22401 | } |
22402 | unsigned LExtIndex = LHS.getConstantOperandVal(1); |
22403 | unsigned RExtIndex = RHS.getConstantOperandVal(1); |
22404 | if ((LExtIndex & 1) == 1 && (RExtIndex & 1) == 0 && |
22405 | (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD)) |
22406 | std::swap(LExtIndex, RExtIndex); |
22407 | |
22408 | if ((LExtIndex & 1) != 0 || RExtIndex != (LExtIndex + 1)) |
22409 | return Op; |
22410 | |
22411 | SDValue X = LHS.getOperand(0); |
22412 | EVT VecVT = X.getValueType(); |
22413 | unsigned BitWidth = VecVT.getSizeInBits(); |
22414 | unsigned NumLanes = BitWidth / 128; |
22415 | unsigned NumEltsPerLane = VecVT.getVectorNumElements() / NumLanes; |
22416 | assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) && |
22417 | "Not expecting illegal vector widths here"); |
22418 | |
22419 | |
22420 | |
22421 | SDLoc DL(Op); |
22422 | if (BitWidth == 256 || BitWidth == 512) { |
22423 | unsigned LaneIdx = LExtIndex / NumEltsPerLane; |
22424 | X = extract128BitVector(X, LaneIdx * NumEltsPerLane, DAG, DL); |
22425 | LExtIndex %= NumEltsPerLane; |
22426 | } |
22427 | |
22428 | |
22429 | |
22430 | |
22431 | |
22432 | SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X); |
22433 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp, |
22434 | DAG.getIntPtrConstant(LExtIndex / 2, DL)); |
22435 | } |
22436 | |
22437 | |
22438 | |
22439 | SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const { |
22440 | assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && |
22441 | "Only expecting float/double"); |
22442 | return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); |
22443 | } |
22444 | |
22445 | |
22446 | |
22447 | |
22448 | |
22449 | static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) { |
22450 | SDValue N0 = Op.getOperand(0); |
22451 | SDLoc dl(Op); |
22452 | MVT VT = Op.getSimpleValueType(); |
22453 | |
22454 | |
22455 | const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); |
22456 | bool Ignored; |
22457 | APFloat Point5Pred = APFloat(0.5f); |
22458 | Point5Pred.convert(Sem, APFloat::rmNearestTiesToEven, &Ignored); |
22459 | Point5Pred.next(true); |
22460 | |
22461 | SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT, |
22462 | DAG.getConstantFP(Point5Pred, dl, VT), N0); |
22463 | N0 = DAG.getNode(ISD::FADD, dl, VT, N0, Adder); |
22464 | |
22465 | |
22466 | return DAG.getNode(ISD::FTRUNC, dl, VT, N0); |
22467 | } |
22468 | |
22469 | |
22470 | |
22471 | static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { |
22472 | assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) && |
22473 | "Wrong opcode for lowering FABS or FNEG."); |
22474 | |
22475 | bool IsFABS = (Op.getOpcode() == ISD::FABS); |
22476 | |
22477 | |
22478 | |
22479 | if (IsFABS) |
22480 | for (SDNode *User : Op->uses()) |
22481 | if (User->getOpcode() == ISD::FNEG) |
22482 | return Op; |
22483 | |
22484 | SDLoc dl(Op); |
22485 | MVT VT = Op.getSimpleValueType(); |
22486 | |
22487 | bool IsF128 = (VT == MVT::f128); |
22488 | assert(VT.isFloatingPoint() && VT != MVT::f80 && |
22489 | DAG.getTargetLoweringInfo().isTypeLegal(VT) && |
22490 | "Unexpected type in LowerFABSorFNEG"); |
22491 | |
22492 | |
22493 | |
22494 | |
22495 | |
22496 | |
22497 | |
22498 | |
22499 | |
22500 | bool IsFakeVector = !VT.isVector() && !IsF128; |
22501 | MVT LogicVT = VT; |
22502 | if (IsFakeVector) |
22503 | LogicVT = (VT == MVT::f64) ? MVT::v2f64 |
22504 | : (VT == MVT::f32) ? MVT::v4f32 |
22505 | : MVT::v8f16; |
22506 | |
22507 | unsigned EltBits = VT.getScalarSizeInBits(); |
22508 | |
22509 | APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) : |
22510 | APInt::getSignMask(EltBits); |
22511 | const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); |
22512 | SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT); |
22513 | |
22514 | SDValue Op0 = Op.getOperand(0); |
22515 | bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS); |
22516 | unsigned LogicOp = IsFABS ? X86ISD::FAND : |
22517 | IsFNABS ? X86ISD::FOR : |
22518 | X86ISD::FXOR; |
22519 | SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; |
22520 | |
22521 | if (VT.isVector() || IsF128) |
22522 | return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); |
22523 | |
22524 | |
22525 | |
22526 | Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand); |
22527 | SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); |
22528 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode, |
22529 | DAG.getIntPtrConstant(0, dl)); |
22530 | } |
22531 | |
22532 | static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { |
22533 | SDValue Mag = Op.getOperand(0); |
22534 | SDValue Sign = Op.getOperand(1); |
22535 | SDLoc dl(Op); |
22536 | |
22537 | |
22538 | MVT VT = Op.getSimpleValueType(); |
22539 | if (Sign.getSimpleValueType().bitsLT(VT)) |
22540 | Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign); |
22541 | |
22542 | |
22543 | if (Sign.getSimpleValueType().bitsGT(VT)) |
22544 | Sign = |
22545 | DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(0, dl)); |
22546 | |
22547 | |
22548 | |
22549 | bool IsF128 = (VT == MVT::f128); |
22550 | assert(VT.isFloatingPoint() && VT != MVT::f80 && |
22551 | DAG.getTargetLoweringInfo().isTypeLegal(VT) && |
22552 | "Unexpected type in LowerFCOPYSIGN"); |
22553 | |
22554 | const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); |
22555 | |
22556 | |
22557 | |
22558 | |
22559 | |
22560 | |
22561 | bool IsFakeVector = !VT.isVector() && !IsF128; |
22562 | MVT LogicVT = VT; |
22563 | if (IsFakeVector) |
22564 | LogicVT = (VT == MVT::f64) ? MVT::v2f64 |
22565 | : (VT == MVT::f32) ? MVT::v4f32 |
22566 | : MVT::v8f16; |
22567 | |
22568 | |
22569 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
22570 | SDValue SignMask = DAG.getConstantFP( |
22571 | APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT); |
22572 | SDValue MagMask = DAG.getConstantFP( |
22573 | APFloat(Sem, APInt::getSignedMaxValue(EltSizeInBits)), dl, LogicVT); |
22574 | |
22575 | |
22576 | if (IsFakeVector) |
22577 | Sign = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Sign); |
22578 | SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask); |
22579 | |
22580 | |
22581 | |
22582 | |
22583 | SDValue MagBits; |
22584 | if (ConstantFPSDNode *Op0CN = isConstOrConstSplatFP(Mag)) { |
22585 | APFloat APF = Op0CN->getValueAPF(); |
22586 | APF.clearSign(); |
22587 | MagBits = DAG.getConstantFP(APF, dl, LogicVT); |
22588 | } else { |
22589 | |
22590 | if (IsFakeVector) |
22591 | Mag = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Mag); |
22592 | MagBits = DAG.getNode(X86ISD::FAND, dl, LogicVT, Mag, MagMask); |
22593 | } |
22594 | |
22595 | |
22596 | SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit); |
22597 | return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or, |
22598 | DAG.getIntPtrConstant(0, dl)); |
22599 | } |
22600 | |
22601 | static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { |
22602 | SDValue N0 = Op.getOperand(0); |
22603 | SDLoc dl(Op); |
22604 | MVT VT = Op.getSimpleValueType(); |
22605 | |
22606 | MVT OpVT = N0.getSimpleValueType(); |
22607 | assert((OpVT == MVT::f32 || OpVT == MVT::f64) && |
22608 | "Unexpected type for FGETSIGN"); |
22609 | |
22610 | |
22611 | MVT VecVT = (OpVT == MVT::f32 ? MVT::v4f32 : MVT::v2f64); |
22612 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, N0); |
22613 | Res = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, Res); |
22614 | Res = DAG.getZExtOrTrunc(Res, dl, VT); |
22615 | Res = DAG.getNode(ISD::AND, dl, VT, Res, DAG.getConstant(1, dl, VT)); |
22616 | return Res; |
22617 | } |
22618 | |
22619 | |
22620 | static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl, |
22621 | SelectionDAG &DAG) { |
22622 | return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, |
22623 | DAG.getTargetConstant(Cond, dl, MVT::i8), EFLAGS); |
22624 | } |
22625 | |
22626 | |
22627 | |
22628 | |
22629 | |
22630 | static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp, |
22631 | SmallVectorImpl<SDValue> &SrcOps, |
22632 | SmallVectorImpl<APInt> *SrcMask = nullptr) { |
22633 | SmallVector<SDValue, 8> Opnds; |
22634 | DenseMap<SDValue, APInt> SrcOpMap; |
22635 | EVT VT = MVT::Other; |
22636 | |
22637 | |
22638 | |
22639 | assert(Op.getOpcode() == unsigned(BinOp) && |
22640 | "Unexpected bit reduction opcode"); |
22641 | Opnds.push_back(Op.getOperand(0)); |
22642 | Opnds.push_back(Op.getOperand(1)); |
22643 | |
22644 | for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) { |
22645 | SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot; |
22646 | |
22647 | if (I->getOpcode() == unsigned(BinOp)) { |
22648 | Opnds.push_back(I->getOperand(0)); |
22649 | Opnds.push_back(I->getOperand(1)); |
22650 | |
22651 | e += 2; |
22652 | continue; |
22653 | } |
22654 | |
22655 | |
22656 | if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
22657 | return false; |
22658 | |
22659 | |
22660 | auto *Idx = dyn_cast<ConstantSDNode>(I->getOperand(1)); |
22661 | if (!Idx) |
22662 | return false; |
22663 | |
22664 | SDValue Src = I->getOperand(0); |
22665 | DenseMap<SDValue, APInt>::iterator M = SrcOpMap.find(Src); |
22666 | if (M == SrcOpMap.end()) { |
22667 | VT = Src.getValueType(); |
22668 | |
22669 | if (!SrcOpMap.empty() && VT != SrcOpMap.begin()->first.getValueType()) |
22670 | return false; |
22671 | unsigned NumElts = VT.getVectorNumElements(); |
22672 | APInt EltCount = APInt::getNullValue(NumElts); |
22673 | M = SrcOpMap.insert(std::make_pair(Src, EltCount)).first; |
22674 | SrcOps.push_back(Src); |
22675 | } |
22676 | |
22677 | |
22678 | unsigned CIdx = Idx->getZExtValue(); |
22679 | if (M->second[CIdx]) |
22680 | return false; |
22681 | M->second.setBit(CIdx); |
22682 | } |
22683 | |
22684 | if (SrcMask) { |
22685 | |
22686 | for (SDValue &SrcOp : SrcOps) |
22687 | SrcMask->push_back(SrcOpMap[SrcOp]); |
22688 | } else { |
22689 | |
22690 | for (const auto &I : SrcOpMap) |
22691 | if (!I.second.isAllOnesValue()) |
22692 | return false; |
22693 | } |
22694 | |
22695 | return true; |
22696 | } |
22697 | |
22698 | |
22699 | static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, |
22700 | const APInt &Mask, |
22701 | const X86Subtarget &Subtarget, |
22702 | SelectionDAG &DAG, X86::CondCode &X86CC) { |
22703 | EVT VT = V.getValueType(); |
22704 | unsigned ScalarSize = VT.getScalarSizeInBits(); |
22705 | if (Mask.getBitWidth() != ScalarSize) { |
22706 | assert(ScalarSize == 1 && "Element Mask vs Vector bitwidth mismatch"); |
22707 | return SDValue(); |
22708 | } |
22709 | |
22710 | assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode"); |
22711 | X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE); |
22712 | |
22713 | auto MaskBits = [&](SDValue Src) { |
22714 | if (Mask.isAllOnesValue()) |
22715 | return Src; |
22716 | EVT SrcVT = Src.getValueType(); |
22717 | SDValue MaskValue = DAG.getConstant(Mask, DL, SrcVT); |
22718 | return DAG.getNode(ISD::AND, DL, SrcVT, Src, MaskValue); |
22719 | }; |
22720 | |
22721 | |
22722 | if (VT.getSizeInBits() < 128) { |
22723 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); |
22724 | if (!DAG.getTargetLoweringInfo().isTypeLegal(IntVT)) |
22725 | return SDValue(); |
22726 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, |
22727 | DAG.getBitcast(IntVT, MaskBits(V)), |
22728 | DAG.getConstant(0, DL, IntVT)); |
22729 | } |
22730 | |
22731 | |
22732 | if (!isPowerOf2_32(VT.getSizeInBits())) |
22733 | return SDValue(); |
22734 | |
22735 | |
22736 | unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; |
22737 | while (VT.getSizeInBits() > TestSize) { |
22738 | auto Split = DAG.SplitVector(V, DL); |
22739 | VT = Split.first.getValueType(); |
22740 | V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); |
22741 | } |
22742 | |
22743 | bool UsePTEST = Subtarget.hasSSE41(); |
22744 | if (UsePTEST) { |
22745 | MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; |
22746 | V = DAG.getBitcast(TestVT, MaskBits(V)); |
22747 | return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, V, V); |
22748 | } |
22749 | |
22750 | |
22751 | |
22752 | if (!Mask.isAllOnesValue() && VT.getScalarSizeInBits() > 32) |
22753 | return SDValue(); |
22754 | |
22755 | V = DAG.getBitcast(MVT::v16i8, MaskBits(V)); |
22756 | V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V, |
22757 | getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); |
22758 | V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); |
22759 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, V, |
22760 | DAG.getConstant(0xFFFF, DL, MVT::i32)); |
22761 | } |
22762 | |
22763 | |
22764 | |
22765 | static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC, |
22766 | const SDLoc &DL, |
22767 | const X86Subtarget &Subtarget, |
22768 | SelectionDAG &DAG, SDValue &X86CC) { |
22769 | assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode"); |
22770 | |
22771 | if (!Subtarget.hasSSE2() || !Op->hasOneUse()) |
22772 | return SDValue(); |
22773 | |
22774 | |
22775 | |
22776 | APInt Mask = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); |
22777 | switch (Op.getOpcode()) { |
22778 | case ISD::TRUNCATE: { |
22779 | SDValue Src = Op.getOperand(0); |
22780 | Mask = APInt::getLowBitsSet(Src.getScalarValueSizeInBits(), |
22781 | Op.getScalarValueSizeInBits()); |
22782 | Op = Src; |
22783 | break; |
22784 | } |
22785 | case ISD::AND: { |
22786 | if (auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { |
22787 | Mask = Cst->getAPIntValue(); |
22788 | Op = Op.getOperand(0); |
22789 | } |
22790 | break; |
22791 | } |
22792 | } |
22793 | |
22794 | SmallVector<SDValue, 8> VecIns; |
22795 | if (Op.getOpcode() == ISD::OR && matchScalarReduction(Op, ISD::OR, VecIns)) { |
22796 | EVT VT = VecIns[0].getValueType(); |
22797 | assert(llvm::all_of(VecIns, |
22798 | [VT](SDValue V) { return VT == V.getValueType(); }) && |
22799 | "Reduction source vector mismatch"); |
22800 | |
22801 | |
22802 | if (VT.getSizeInBits() < 128 || !isPowerOf2_32(VT.getSizeInBits())) |
22803 | return SDValue(); |
22804 | |
22805 | |
22806 | for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; |
22807 | Slot += 2, e += 1) { |
22808 | |
22809 | |
22810 | SDValue LHS = VecIns[Slot]; |
22811 | SDValue RHS = VecIns[Slot + 1]; |
22812 | VecIns.push_back(DAG.getNode(ISD::OR, DL, VT, LHS, RHS)); |
22813 | } |
22814 | |
22815 | X86::CondCode CCode; |
22816 | if (SDValue V = LowerVectorAllZero(DL, VecIns.back(), CC, Mask, Subtarget, |
22817 | DAG, CCode)) { |
22818 | X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8); |
22819 | return V; |
22820 | } |
22821 | } |
22822 | |
22823 | if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
22824 | ISD::NodeType BinOp; |
22825 | if (SDValue Match = |
22826 | DAG.matchBinOpReduction(Op.getNode(), BinOp, {ISD::OR})) { |
22827 | X86::CondCode CCode; |
22828 | if (SDValue V = |
22829 | LowerVectorAllZero(DL, Match, CC, Mask, Subtarget, DAG, CCode)) { |
22830 | X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8); |
22831 | return V; |
22832 | } |
22833 | } |
22834 | } |
22835 | |
22836 | return SDValue(); |
22837 | } |
22838 | |
22839 | |
22840 | static bool hasNonFlagsUse(SDValue Op) { |
22841 | for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE; |
22842 | ++UI) { |
22843 | SDNode *User = *UI; |
22844 | unsigned UOpNo = UI.getOperandNo(); |
22845 | if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { |
22846 | |
22847 | UOpNo = User->use_begin().getOperandNo(); |
22848 | User = *User->use_begin(); |
22849 | } |
22850 | |
22851 | if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && |
22852 | !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) |
22853 | return true; |
22854 | } |
22855 | return false; |
22856 | } |
22857 | |
22858 | |
22859 | |
22860 | |
22861 | static bool isProfitableToUseFlagOp(SDValue Op) { |
22862 | for (SDNode *U : Op->uses()) |
22863 | if (U->getOpcode() != ISD::CopyToReg && |
22864 | U->getOpcode() != ISD::SETCC && |
22865 | U->getOpcode() != ISD::STORE) |
22866 | return false; |
22867 | |
22868 | return true; |
22869 | } |
22870 | |
22871 | |
22872 | |
22873 | static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, |
22874 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
22875 | |
22876 | |
22877 | bool NeedCF = false; |
22878 | bool NeedOF = false; |
22879 | switch (X86CC) { |
22880 | default: break; |
22881 | case X86::COND_A: case X86::COND_AE: |
22882 | case X86::COND_B: case X86::COND_BE: |
22883 | NeedCF = true; |
22884 | break; |
22885 | case X86::COND_G: case X86::COND_GE: |
22886 | case X86::COND_L: case X86::COND_LE: |
22887 | case X86::COND_O: case X86::COND_NO: { |
22888 | |
22889 | |
22890 | |
22891 | switch (Op->getOpcode()) { |
22892 | case ISD::ADD: |
22893 | case ISD::SUB: |
22894 | case ISD::MUL: |
22895 | case ISD::SHL: |
22896 | if (Op.getNode()->getFlags().hasNoSignedWrap()) |
22897 | break; |
22898 | LLVM_FALLTHROUGH; |
22899 | default: |
22900 | NeedOF = true; |
22901 | break; |
22902 | } |
22903 | break; |
22904 | } |
22905 | } |
22906 | |
22907 | |
22908 | |
22909 | if (Op.getResNo() != 0 || NeedOF || NeedCF) { |
22910 | |
22911 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
22912 | DAG.getConstant(0, dl, Op.getValueType())); |
22913 | } |
22914 | unsigned Opcode = 0; |
22915 | unsigned NumOperands = 0; |
22916 | |
22917 | SDValue ArithOp = Op; |
22918 | |
22919 | |
22920 | |
22921 | |
22922 | switch (ArithOp.getOpcode()) { |
22923 | case ISD::AND: |
22924 | |
22925 | |
22926 | if (!hasNonFlagsUse(Op)) |
22927 | break; |
22928 | |
22929 | LLVM_FALLTHROUGH; |
22930 | case ISD::ADD: |
22931 | case ISD::SUB: |
22932 | case ISD::OR: |
22933 | case ISD::XOR: |
22934 | if (!isProfitableToUseFlagOp(Op)) |
22935 | break; |
22936 | |
22937 | |
22938 | switch (ArithOp.getOpcode()) { |
22939 | default: llvm_unreachable("unexpected operator!"); |
22940 | case ISD::ADD: Opcode = X86ISD::ADD; break; |
22941 | case ISD::SUB: Opcode = X86ISD::SUB; break; |
22942 | case ISD::XOR: Opcode = X86ISD::XOR; break; |
22943 | case ISD::AND: Opcode = X86ISD::AND; break; |
22944 | case ISD::OR: Opcode = X86ISD::OR; break; |
22945 | } |
22946 | |
22947 | NumOperands = 2; |
22948 | break; |
22949 | case X86ISD::ADD: |
22950 | case X86ISD::SUB: |
22951 | case X86ISD::OR: |
22952 | case X86ISD::XOR: |
22953 | case X86ISD::AND: |
22954 | return SDValue(Op.getNode(), 1); |
22955 | case ISD::SSUBO: |
22956 | case ISD::USUBO: { |
22957 | |
22958 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
22959 | return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0), |
22960 | Op->getOperand(1)).getValue(1); |
22961 | } |
22962 | default: |
22963 | break; |
22964 | } |
22965 | |
22966 | if (Opcode == 0) { |
22967 | |
22968 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
22969 | DAG.getConstant(0, dl, Op.getValueType())); |
22970 | } |
22971 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
22972 | SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands); |
22973 | |
22974 | SDValue New = DAG.getNode(Opcode, dl, VTs, Ops); |
22975 | DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), New); |
22976 | return SDValue(New.getNode(), 1); |
22977 | } |
22978 | |
22979 | |
22980 | |
22981 | static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, |
22982 | const SDLoc &dl, SelectionDAG &DAG, |
22983 | const X86Subtarget &Subtarget) { |
22984 | if (isNullConstant(Op1)) |
22985 | return EmitTest(Op0, X86CC, dl, DAG, Subtarget); |
22986 | |
22987 | EVT CmpVT = Op0.getValueType(); |
22988 | |
22989 | assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 || |
22990 | CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!"); |
22991 | |
22992 | |
22993 | |
22994 | if (CmpVT == MVT::i16 && !Subtarget.isAtom() && |
22995 | !DAG.getMachineFunction().getFunction().hasMinSize()) { |
22996 | ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); |
22997 | ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); |
22998 | |
22999 | if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) || |
23000 | (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) { |
23001 | unsigned ExtendOp = |
23002 | isX86CCSigned(X86CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
23003 | if (X86CC == X86::COND_E || X86CC == X86::COND_NE) { |
23004 | |
23005 | |
23006 | if (Op0.getOpcode() == ISD::TRUNCATE) { |
23007 | SDValue In = Op0.getOperand(0); |
23008 | unsigned EffBits = |
23009 | In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1; |
23010 | if (EffBits <= 16) |
23011 | ExtendOp = ISD::SIGN_EXTEND; |
23012 | } else if (Op1.getOpcode() == ISD::TRUNCATE) { |
23013 | SDValue In = Op1.getOperand(0); |
23014 | unsigned EffBits = |
23015 | In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1; |
23016 | if (EffBits <= 16) |
23017 | ExtendOp = ISD::SIGN_EXTEND; |
23018 | } |
23019 | } |
23020 | |
23021 | CmpVT = MVT::i32; |
23022 | Op0 = DAG.getNode(ExtendOp, dl, CmpVT, Op0); |
23023 | Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1); |
23024 | } |
23025 | } |
23026 | |
23027 | |
23028 | |
23029 | if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) && |
23030 | Op0.hasOneUse() && |
23031 | cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 && |
23032 | DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) { |
23033 | CmpVT = MVT::i32; |
23034 | Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0); |
23035 | Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1); |
23036 | } |
23037 | |
23038 | |
23039 | |
23040 | if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op0.getOperand(0)) && |
23041 | Op0.hasOneUse() && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { |
23042 | SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); |
23043 | SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(1), Op1); |
23044 | return Add.getValue(1); |
23045 | } |
23046 | |
23047 | |
23048 | |
23049 | if (Op1.getOpcode() == ISD::SUB && isNullConstant(Op1.getOperand(0)) && |
23050 | Op1.hasOneUse() && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { |
23051 | SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); |
23052 | SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0, Op1.getOperand(1)); |
23053 | return Add.getValue(1); |
23054 | } |
23055 | |
23056 | |
23057 | SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); |
23058 | SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1); |
23059 | return Sub.getValue(1); |
23060 | } |
23061 | |
23062 | |
23063 | bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { |
23064 | EVT VT = Op.getValueType(); |
23065 | |
23066 | |
23067 | if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op)) |
23068 | return false; |
23069 | |
23070 | if (VT.isVector()) |
23071 | return Subtarget.hasFastVectorFSQRT(); |
23072 | return Subtarget.hasFastScalarFSQRT(); |
23073 | } |
23074 | |
23075 | |
23076 | |
23077 | SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, |
23078 | SelectionDAG &DAG, int Enabled, |
23079 | int &RefinementSteps, |
23080 | bool &UseOneConstNR, |
23081 | bool Reciprocal) const { |
23082 | EVT VT = Op.getValueType(); |
23083 | |
23084 | |
23085 | |
23086 | |
23087 | |
23088 | |
23089 | |
23090 | |
23091 | |
23092 | if ((VT == MVT::f32 && Subtarget.hasSSE1()) || |
23093 | (VT == MVT::v4f32 && Subtarget.hasSSE1() && Reciprocal) || |
23094 | (VT == MVT::v4f32 && Subtarget.hasSSE2() && !Reciprocal) || |
23095 | (VT == MVT::v8f32 && Subtarget.hasAVX()) || |
23096 | (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { |
23097 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
23098 | RefinementSteps = 1; |
23099 | |
23100 | UseOneConstNR = false; |
23101 | |
23102 | unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT; |
23103 | return DAG.getNode(Opcode, SDLoc(Op), VT, Op); |
23104 | } |
23105 | return SDValue(); |
23106 | } |
23107 | |
23108 | |
23109 | |
23110 | SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG, |
23111 | int Enabled, |
23112 | int &RefinementSteps) const { |
23113 | EVT VT = Op.getValueType(); |
23114 | |
23115 | |
23116 | |
23117 | |
23118 | |
23119 | |
23120 | |
23121 | |
23122 | if ((VT == MVT::f32 && Subtarget.hasSSE1()) || |
23123 | (VT == MVT::v4f32 && Subtarget.hasSSE1()) || |
23124 | (VT == MVT::v8f32 && Subtarget.hasAVX()) || |
23125 | (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { |
23126 | |
23127 | |
23128 | |
23129 | if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified) |
23130 | return SDValue(); |
23131 | |
23132 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
23133 | RefinementSteps = 1; |
23134 | |
23135 | |
23136 | unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RCP14 : X86ISD::FRCP; |
23137 | return DAG.getNode(Opcode, SDLoc(Op), VT, Op); |
23138 | } |
23139 | return SDValue(); |
23140 | } |
23141 | |
23142 | |
23143 | |
23144 | |
23145 | |
23146 | |
23147 | |
23148 | unsigned X86TargetLowering::combineRepeatedFPDivisors() const { |
23149 | return 2; |
23150 | } |
23151 | |
23152 | SDValue |
23153 | X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
23154 | SelectionDAG &DAG, |
23155 | SmallVectorImpl<SDNode *> &Created) const { |
23156 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
23157 | if (isIntDivCheap(N->getValueType(0), Attr)) |
23158 | return SDValue(N,0); |
23159 | |
23160 | assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) && |
23161 | "Unexpected divisor!"); |
23162 | |
23163 | |
23164 | |
23165 | if (!Subtarget.hasCMov()) |
23166 | return SDValue(); |
23167 | |
23168 | |
23169 | EVT VT = N->getValueType(0); |
23170 | |
23171 | if (VT != MVT::i16 && VT != MVT::i32 && |
23172 | !(Subtarget.is64Bit() && VT == MVT::i64)) |
23173 | return SDValue(); |
23174 | |
23175 | unsigned Lg2 = Divisor.countTrailingZeros(); |
23176 | |
23177 | |
23178 | if (Lg2 == 1) |
23179 | return SDValue(); |
23180 | |
23181 | SDLoc DL(N); |
23182 | SDValue N0 = N->getOperand(0); |
23183 | SDValue Zero = DAG.getConstant(0, DL, VT); |
23184 | APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2); |
23185 | SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT); |
23186 | |
23187 | |
23188 | SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT); |
23189 | SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); |
23190 | SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); |
23191 | |
23192 | Created.push_back(Cmp.getNode()); |
23193 | Created.push_back(Add.getNode()); |
23194 | Created.push_back(CMov.getNode()); |
23195 | |
23196 | |
23197 | SDValue SRA = |
23198 | DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8)); |
23199 | |
23200 | |
23201 | |
23202 | if (Divisor.isNonNegative()) |
23203 | return SRA; |
23204 | |
23205 | Created.push_back(SRA.getNode()); |
23206 | return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); |
23207 | } |
23208 | |
23209 | |
23210 | |
23211 | static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, |
23212 | const SDLoc &dl, SelectionDAG &DAG, |
23213 | SDValue &X86CC) { |
23214 | assert(And.getOpcode() == ISD::AND && "Expected AND node!"); |
23215 | SDValue Op0 = And.getOperand(0); |
23216 | SDValue Op1 = And.getOperand(1); |
23217 | if (Op0.getOpcode() == ISD::TRUNCATE) |
23218 | Op0 = Op0.getOperand(0); |
23219 | if (Op1.getOpcode() == ISD::TRUNCATE) |
23220 | Op1 = Op1.getOperand(0); |
23221 | |
23222 | SDValue Src, BitNo; |
23223 | if (Op1.getOpcode() == ISD::SHL) |
23224 | std::swap(Op0, Op1); |
23225 | if (Op0.getOpcode() == ISD::SHL) { |
23226 | if (isOneConstant(Op0.getOperand(0))) { |
23227 | |
23228 | |
23229 | unsigned BitWidth = Op0.getValueSizeInBits(); |
23230 | unsigned AndBitWidth = And.getValueSizeInBits(); |
23231 | if (BitWidth > AndBitWidth) { |
23232 | KnownBits Known = DAG.computeKnownBits(Op0); |
23233 | if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth) |
23234 | return SDValue(); |
23235 | } |
23236 | Src = Op1; |
23237 | BitNo = Op0.getOperand(1); |
23238 | } |
23239 | } else if (Op1.getOpcode() == ISD::Constant) { |
23240 | ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); |
23241 | uint64_t AndRHSVal = AndRHS->getZExtValue(); |
23242 | SDValue AndLHS = Op0; |
23243 | |
23244 | if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) { |
23245 | Src = AndLHS.getOperand(0); |
23246 | BitNo = AndLHS.getOperand(1); |
23247 | } else { |
23248 | |
23249 | |
23250 | bool OptForSize = DAG.shouldOptForSize(); |
23251 | if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) && |
23252 | isPowerOf2_64(AndRHSVal)) { |
23253 | Src = AndLHS; |
23254 | BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, |
23255 | Src.getValueType()); |
23256 | } |
23257 | } |
23258 | } |
23259 | |
23260 | |
23261 | if (!Src.getNode()) |
23262 | return SDValue(); |
23263 | |
23264 | |
23265 | |
23266 | |
23267 | |
23268 | |
23269 | if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16) |
23270 | Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src); |
23271 | |
23272 | |
23273 | |
23274 | |
23275 | |
23276 | if (Src.getValueType() == MVT::i64 && |
23277 | DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32))) |
23278 | Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); |
23279 | |
23280 | |
23281 | |
23282 | if (Src.getValueType() != BitNo.getValueType()) |
23283 | BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); |
23284 | |
23285 | X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, |
23286 | dl, MVT::i8); |
23287 | return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); |
23288 | } |
23289 | |
23290 | |
23291 | |
23292 | static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, |
23293 | SDValue &Op1, bool &IsAlwaysSignaling) { |
23294 | unsigned SSECC; |
23295 | bool Swap = false; |
23296 | |
23297 | |
23298 | |
23299 | |
23300 | |
23301 | |
23302 | |
23303 | |
23304 | |
23305 | |
23306 | switch (SetCCOpcode) { |
23307 | default: llvm_unreachable("Unexpected SETCC condition"); |
23308 | case ISD::SETOEQ: |
23309 | case ISD::SETEQ: SSECC = 0; break; |
23310 | case ISD::SETOGT: |
23311 | case ISD::SETGT: Swap = true; LLVM_FALLTHROUGH; |
23312 | case ISD::SETLT: |
23313 | case ISD::SETOLT: SSECC = 1; break; |
23314 | case ISD::SETOGE: |
23315 | case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH; |
23316 | case ISD::SETLE: |
23317 | case ISD::SETOLE: SSECC = 2; break; |
23318 | case ISD::SETUO: SSECC = 3; break; |
23319 | case ISD::SETUNE: |
23320 | case ISD::SETNE: SSECC = 4; break; |
23321 | case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; |
23322 | case ISD::SETUGE: SSECC = 5; break; |
23323 | case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; |
23324 | case ISD::SETUGT: SSECC = 6; break; |
23325 | case ISD::SETO: SSECC = 7; break; |
23326 | case ISD::SETUEQ: SSECC = 8; break; |
23327 | case ISD::SETONE: SSECC = 12; break; |
23328 | } |
23329 | if (Swap) |
23330 | std::swap(Op0, Op1); |
23331 | |
23332 | switch (SetCCOpcode) { |
23333 | default: |
23334 | IsAlwaysSignaling = true; |
23335 | break; |
23336 | case ISD::SETEQ: |
23337 | case ISD::SETOEQ: |
23338 | case ISD::SETUEQ: |
23339 | case ISD::SETNE: |
23340 | case ISD::SETONE: |
23341 | case ISD::SETUNE: |
23342 | case ISD::SETO: |
23343 | case ISD::SETUO: |
23344 | IsAlwaysSignaling = false; |
23345 | break; |
23346 | } |
23347 | |
23348 | return SSECC; |
23349 | } |
23350 | |
23351 | |
23352 | |
23353 | static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS, |
23354 | ISD::CondCode Cond, SelectionDAG &DAG, |
23355 | const SDLoc &dl) { |
23356 | assert(VT.isInteger() && VT == LHS.getValueType() && |
23357 | VT == RHS.getValueType() && "Unsupported VTs!"); |
23358 | |
23359 | SDValue CC = DAG.getCondCode(Cond); |
23360 | |
23361 | |
23362 | SDValue LHS1, LHS2; |
23363 | std::tie(LHS1, LHS2) = splitVector(LHS, DAG, dl); |
23364 | |
23365 | |
23366 | SDValue RHS1, RHS2; |
23367 | std::tie(RHS1, RHS2) = splitVector(RHS, DAG, dl); |
23368 | |
23369 | |
23370 | EVT LoVT, HiVT; |
23371 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
23372 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, |
23373 | DAG.getNode(ISD::SETCC, dl, LoVT, LHS1, RHS1, CC), |
23374 | DAG.getNode(ISD::SETCC, dl, HiVT, LHS2, RHS2, CC)); |
23375 | } |
23376 | |
23377 | static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { |
23378 | |
23379 | SDValue Op0 = Op.getOperand(0); |
23380 | SDValue Op1 = Op.getOperand(1); |
23381 | SDValue CC = Op.getOperand(2); |
23382 | MVT VT = Op.getSimpleValueType(); |
23383 | SDLoc dl(Op); |
23384 | |
23385 | assert(VT.getVectorElementType() == MVT::i1 && |
23386 | "Cannot set masked compare for this operation"); |
23387 | |
23388 | ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); |
23389 | |
23390 | |
23391 | if (SetCCOpcode == ISD::SETLT) { |
23392 | SetCCOpcode = ISD::getSetCCSwappedOperands(SetCCOpcode); |
23393 | std::swap(Op0, Op1); |
23394 | } |
23395 | |
23396 | return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode); |
23397 | } |
23398 | |
23399 | |
23400 | |
23401 | |
23402 | |
23403 | static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) { |
23404 | auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode()); |
23405 | if (!BV) |
23406 | return SDValue(); |
23407 | |
23408 | MVT VT = V.getSimpleValueType(); |
23409 | MVT EltVT = VT.getVectorElementType(); |
23410 | unsigned NumElts = VT.getVectorNumElements(); |
23411 | SmallVector<SDValue, 8> NewVecC; |
23412 | SDLoc DL(V); |
23413 | for (unsigned i = 0; i < NumElts; ++i) { |
23414 | auto *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i)); |
23415 | if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT) |
23416 | return SDValue(); |
23417 | |
23418 | |
23419 | const APInt &EltC = Elt->getAPIntValue(); |
23420 | if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isNullValue())) |
23421 | return SDValue(); |
23422 | |
23423 | NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT)); |
23424 | } |
23425 | |
23426 | return DAG.getBuildVector(VT, DL, NewVecC); |
23427 | } |
23428 | |
23429 | |
23430 | |
23431 | |
23432 | |
23433 | static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, |
23434 | ISD::CondCode Cond, const SDLoc &dl, |
23435 | const X86Subtarget &Subtarget, |
23436 | SelectionDAG &DAG) { |
23437 | if (!Subtarget.hasSSE2()) |
23438 | return SDValue(); |
23439 | |
23440 | MVT VET = VT.getVectorElementType(); |
23441 | if (VET != MVT::i8 && VET != MVT::i16) |
23442 | return SDValue(); |
23443 | |
23444 | switch (Cond) { |
23445 | default: |
23446 | return SDValue(); |
23447 | case ISD::SETULT: { |
23448 | |
23449 | |
23450 | |
23451 | |
23452 | |
23453 | if (Subtarget.hasAVX()) |
23454 | return SDValue(); |
23455 | SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, false); |
23456 | if (!ULEOp1) |
23457 | return SDValue(); |
23458 | Op1 = ULEOp1; |
23459 | break; |
23460 | } |
23461 | case ISD::SETUGT: { |
23462 | |
23463 | |
23464 | |
23465 | |
23466 | SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, true); |
23467 | if (!UGEOp1) |
23468 | return SDValue(); |
23469 | Op1 = Op0; |
23470 | Op0 = UGEOp1; |
23471 | break; |
23472 | } |
23473 | |
23474 | case ISD::SETUGE: |
23475 | std::swap(Op0, Op1); |
23476 | break; |
23477 | case ISD::SETULE: |
23478 | break; |
23479 | } |
23480 | |
23481 | SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1); |
23482 | return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result, |
23483 | DAG.getConstant(0, dl, VT)); |
23484 | } |
23485 | |
23486 | static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, |
23487 | SelectionDAG &DAG) { |
23488 | bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC || |
23489 | Op.getOpcode() == ISD::STRICT_FSETCCS; |
23490 | SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0); |
23491 | SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1); |
23492 | SDValue CC = Op.getOperand(IsStrict ? 3 : 2); |
23493 | MVT VT = Op->getSimpleValueType(0); |
23494 | ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get(); |
23495 | bool isFP = Op1.getSimpleValueType().isFloatingPoint(); |
23496 | SDLoc dl(Op); |
23497 | |
23498 | if (isFP) { |
23499 | #ifndef NDEBUG |
23500 | MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); |
23501 | assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64); |
23502 | #endif |
23503 | |
23504 | bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; |
23505 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); |
23506 | |
23507 | |
23508 | |
23509 | |
23510 | |
23511 | unsigned Opc; |
23512 | if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 && |
23513 | (!IsStrict || Subtarget.hasVLX() || |
23514 | Op0.getSimpleValueType().is512BitVector())) { |
23515 | #ifndef NDEBUG |
23516 | unsigned Num = VT.getVectorNumElements(); |
23517 | assert(Num <= 16 || (Num == 32 && EltVT == MVT::f16)); |
23518 | #endif |
23519 | Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM; |
23520 | } else { |
23521 | Opc = IsStrict ? X86ISD::STRICT_CMPP : X86ISD::CMPP; |
23522 | |
23523 | |
23524 | |
23525 | VT = Op0.getSimpleValueType(); |
23526 | } |
23527 | |
23528 | SDValue Cmp; |
23529 | bool IsAlwaysSignaling; |
23530 | unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1, IsAlwaysSignaling); |
23531 | if (!Subtarget.hasAVX()) { |
23532 | |
23533 | |
23534 | |
23535 | |
23536 | |
23537 | |
23538 | |
23539 | if (IsStrict && IsAlwaysSignaling && !IsSignaling) |
23540 | return SDValue(); |
23541 | |
23542 | |
23543 | if (IsStrict && !IsAlwaysSignaling && IsSignaling) { |
23544 | SDValue SignalCmp = DAG.getNode( |
23545 | Opc, dl, {VT, MVT::Other}, |
23546 | {Chain, Op0, Op1, DAG.getTargetConstant(1, dl, MVT::i8)}); |
23547 | |
23548 | |
23549 | |
23550 | |
23551 | SignalCmp->setFlags(Op->getFlags()); |
23552 | Chain = SignalCmp.getValue(1); |
23553 | } |
23554 | |
23555 | |
23556 | |
23557 | if (SSECC >= 8) { |
23558 | |
23559 | unsigned CC0, CC1; |
23560 | unsigned CombineOpc; |
23561 | if (Cond == ISD::SETUEQ) { |
23562 | CC0 = 3; |
23563 | CC1 = 0; |
23564 | CombineOpc = X86ISD::FOR; |
23565 | } else { |
23566 | assert(Cond == ISD::SETONE); |
23567 | CC0 = 7; |
23568 | CC1 = 4; |
23569 | CombineOpc = X86ISD::FAND; |
23570 | } |
23571 | |
23572 | SDValue Cmp0, Cmp1; |
23573 | if (IsStrict) { |
23574 | Cmp0 = DAG.getNode( |
23575 | Opc, dl, {VT, MVT::Other}, |
23576 | {Chain, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)}); |
23577 | Cmp1 = DAG.getNode( |
23578 | Opc, dl, {VT, MVT::Other}, |
23579 | {Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)}); |
23580 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1), |
23581 | Cmp1.getValue(1)); |
23582 | } else { |
23583 | Cmp0 = DAG.getNode( |
23584 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)); |
23585 | Cmp1 = DAG.getNode( |
23586 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)); |
23587 | } |
23588 | Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); |
23589 | } else { |
23590 | if (IsStrict) { |
23591 | Cmp = DAG.getNode( |
23592 | Opc, dl, {VT, MVT::Other}, |
23593 | {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); |
23594 | Chain = Cmp.getValue(1); |
23595 | } else |
23596 | Cmp = DAG.getNode( |
23597 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); |
23598 | } |
23599 | } else { |
23600 | |
23601 | if (IsStrict) { |
23602 | |
23603 | SSECC |= (IsAlwaysSignaling ^ IsSignaling) << 4; |
23604 | Cmp = DAG.getNode( |
23605 | Opc, dl, {VT, MVT::Other}, |
23606 | {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); |
23607 | Chain = Cmp.getValue(1); |
23608 | } else |
23609 | Cmp = DAG.getNode( |
23610 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); |
23611 | } |
23612 | |
23613 | if (VT.getFixedSizeInBits() > |
23614 | Op.getSimpleValueType().getFixedSizeInBits()) { |
23615 | |
23616 | |
23617 | EVT CastVT = EVT(VT).changeVectorElementTypeToInteger(); |
23618 | Cmp = DAG.getBitcast(CastVT, Cmp); |
23619 | Cmp = DAG.getSetCC(dl, Op.getSimpleValueType(), Cmp, |
23620 | DAG.getConstant(0, dl, CastVT), ISD::SETNE); |
23621 | } else { |
23622 | |
23623 | |
23624 | |
23625 | Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp); |
23626 | } |
23627 | |
23628 | if (IsStrict) |
23629 | return DAG.getMergeValues({Cmp, Chain}, dl); |
23630 | |
23631 | return Cmp; |
23632 | } |
23633 | |
23634 | assert(!IsStrict && "Strict SETCC only handles FP operands."); |
23635 | |
23636 | MVT VTOp0 = Op0.getSimpleValueType(); |
23637 | (void)VTOp0; |
23638 | assert(VTOp0 == Op1.getSimpleValueType() && |
23639 | "Expected operands with same type!"); |
23640 | assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && |
23641 | "Invalid number of packed elements for source and destination!"); |
23642 | |
23643 | |
23644 | |
23645 | assert((Subtarget.hasAVX512() || (VT == VTOp0)) && |
23646 | "Value types for source and destination must be the same!"); |
23647 | |
23648 | |
23649 | if (VT.getVectorElementType() == MVT::i1) { |
23650 | |
23651 | |
23652 | assert((VTOp0.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) && |
23653 | "Unexpected operand type"); |
23654 | return LowerIntVSETCC_AVX512(Op, DAG); |
23655 | } |
23656 | |
23657 | |
23658 | if (VT.is128BitVector() && Subtarget.hasXOP()) { |
23659 | |
23660 | unsigned CmpMode = 0; |
23661 | switch (Cond) { |
23662 | default: llvm_unreachable("Unexpected SETCC condition"); |
23663 | case ISD::SETULT: |
23664 | case ISD::SETLT: CmpMode = 0x00; break; |
23665 | case ISD::SETULE: |
23666 | case ISD::SETLE: CmpMode = 0x01; break; |
23667 | case ISD::SETUGT: |
23668 | case ISD::SETGT: CmpMode = 0x02; break; |
23669 | case ISD::SETUGE: |
23670 | case ISD::SETGE: CmpMode = 0x03; break; |
23671 | case ISD::SETEQ: CmpMode = 0x04; break; |
23672 | case ISD::SETNE: CmpMode = 0x05; break; |
23673 | } |
23674 | |
23675 | |
23676 | unsigned Opc = |
23677 | ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; |
23678 | |
23679 | return DAG.getNode(Opc, dl, VT, Op0, Op1, |
23680 | DAG.getTargetConstant(CmpMode, dl, MVT::i8)); |
23681 | } |
23682 | |
23683 | |
23684 | |
23685 | if (Cond == ISD::SETNE && ISD::isBuildVectorAllZeros(Op1.getNode())) { |
23686 | SDValue BC0 = peekThroughBitcasts(Op0); |
23687 | if (BC0.getOpcode() == ISD::AND) { |
23688 | APInt UndefElts; |
23689 | SmallVector<APInt, 64> EltBits; |
23690 | if (getTargetConstantBitsFromNode(BC0.getOperand(1), |
23691 | VT.getScalarSizeInBits(), UndefElts, |
23692 | EltBits, false, false)) { |
23693 | if (llvm::all_of(EltBits, [](APInt &V) { return V.isPowerOf2(); })) { |
23694 | Cond = ISD::SETEQ; |
23695 | Op1 = DAG.getBitcast(VT, BC0.getOperand(1)); |
23696 | } |
23697 | } |
23698 | } |
23699 | } |
23700 | |
23701 | |
23702 | if (Cond == ISD::SETEQ && Op0.getOpcode() == ISD::AND && |
23703 | Op0.getOperand(1) == Op1 && Op0.hasOneUse()) { |
23704 | ConstantSDNode *C1 = isConstOrConstSplat(Op1); |
23705 | if (C1 && C1->getAPIntValue().isPowerOf2()) { |
23706 | unsigned BitWidth = VT.getScalarSizeInBits(); |
23707 | unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1; |
23708 | |
23709 | SDValue Result = Op0.getOperand(0); |
23710 | Result = DAG.getNode(ISD::SHL, dl, VT, Result, |
23711 | DAG.getConstant(ShiftAmt, dl, VT)); |
23712 | Result = DAG.getNode(ISD::SRA, dl, VT, Result, |
23713 | DAG.getConstant(BitWidth - 1, dl, VT)); |
23714 | return Result; |
23715 | } |
23716 | } |
23717 | |
23718 | |
23719 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
23720 | return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); |
23721 | |
23722 | if (VT == MVT::v32i16 || VT == MVT::v64i8) { |
23723 | assert(!Subtarget.hasBWI() && "Unexpected VT with AVX512BW!"); |
23724 | return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); |
23725 | } |
23726 | |
23727 | |
23728 | |
23729 | |
23730 | |
23731 | |
23732 | APInt ConstValue; |
23733 | if (Cond == ISD::SETNE && |
23734 | ISD::isConstantSplatVector(Op1.getNode(), ConstValue)) { |
23735 | if (ConstValue.isMinSignedValue()) |
23736 | Cond = ISD::SETGT; |
23737 | else if (ConstValue.isMaxSignedValue()) |
23738 | Cond = ISD::SETLT; |
23739 | else if (ConstValue.isNullValue() && DAG.SignBitIsZero(Op0)) |
23740 | Cond = ISD::SETGT; |
23741 | } |
23742 | |
23743 | |
23744 | |
23745 | |
23746 | |
23747 | bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) && |
23748 | !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)); |
23749 | |
23750 | |
23751 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
23752 | if (ISD::isUnsignedIntSetCC(Cond) && |
23753 | (FlipSigns || ISD::isTrueWhenEqual(Cond)) && |
23754 | TLI.isOperationLegal(ISD::UMIN, VT)) { |
23755 | |
23756 | |
23757 | if (Cond == ISD::SETUGT) { |
23758 | |
23759 | if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, true)) { |
23760 | Op1 = UGTOp1; |
23761 | Cond = ISD::SETUGE; |
23762 | } |
23763 | } |
23764 | if (Cond == ISD::SETULT) { |
23765 | |
23766 | if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, false)) { |
23767 | Op1 = ULTOp1; |
23768 | Cond = ISD::SETULE; |
23769 | } |
23770 | } |
23771 | bool Invert = false; |
23772 | unsigned Opc; |
23773 | switch (Cond) { |
23774 | default: llvm_unreachable("Unexpected condition code"); |
23775 | case ISD::SETUGT: Invert = true; LLVM_FALLTHROUGH; |
23776 | case ISD::SETULE: Opc = ISD::UMIN; break; |
23777 | case ISD::SETULT: Invert = true; LLVM_FALLTHROUGH; |
23778 | case ISD::SETUGE: Opc = ISD::UMAX; break; |
23779 | } |
23780 | |
23781 | SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); |
23782 | Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result); |
23783 | |
23784 | |
23785 | if (Invert) |
23786 | Result = DAG.getNOT(dl, Result, VT); |
23787 | |
23788 | return Result; |
23789 | } |
23790 | |
23791 | |
23792 | if (FlipSigns) |
23793 | if (SDValue V = |
23794 | LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) |
23795 | return V; |
23796 | |
23797 | |
23798 | |
23799 | |
23800 | unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ |
23801 | : X86ISD::PCMPGT; |
23802 | bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || |
23803 | Cond == ISD::SETGE || Cond == ISD::SETUGE; |
23804 | bool Invert = Cond == ISD::SETNE || |
23805 | (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); |
23806 | |
23807 | if (Swap) |
23808 | std::swap(Op0, Op1); |
23809 | |
23810 | |
23811 | |
23812 | if (VT == MVT::v2i64) { |
23813 | if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) { |
23814 | assert(Subtarget.hasSSE2() && "Don't know how to lower!"); |
23815 | |
23816 | |
23817 | |
23818 | if (!FlipSigns && !Invert && ISD::isBuildVectorAllZeros(Op0.getNode())) { |
23819 | Op0 = DAG.getConstant(0, dl, MVT::v4i32); |
23820 | Op1 = DAG.getBitcast(MVT::v4i32, Op1); |
23821 | |
23822 | SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); |
23823 | static const int MaskHi[] = { 1, 1, 3, 3 }; |
23824 | SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); |
23825 | |
23826 | return DAG.getBitcast(VT, Result); |
23827 | } |
23828 | |
23829 | if (!FlipSigns && !Invert && ISD::isBuildVectorAllOnes(Op1.getNode())) { |
23830 | Op0 = DAG.getBitcast(MVT::v4i32, Op0); |
23831 | Op1 = DAG.getConstant(-1, dl, MVT::v4i32); |
23832 | |
23833 | SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); |
23834 | static const int MaskHi[] = { 1, 1, 3, 3 }; |
23835 | SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); |
23836 | |
23837 | return DAG.getBitcast(VT, Result); |
23838 | } |
23839 | |
23840 | |
23841 | |
23842 | |
23843 | SDValue SB; |
23844 | if (FlipSigns) { |
23845 | SB = DAG.getConstant(0x8000000080000000ULL, dl, MVT::v2i64); |
23846 | } else { |
23847 | SB = DAG.getConstant(0x0000000080000000ULL, dl, MVT::v2i64); |
23848 | } |
23849 | Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB); |
23850 | Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB); |
23851 | |
23852 | |
23853 | Op0 = DAG.getBitcast(MVT::v4i32, Op0); |
23854 | Op1 = DAG.getBitcast(MVT::v4i32, Op1); |
23855 | |
23856 | |
23857 | SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); |
23858 | SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); |
23859 | |
23860 | |
23861 | static const int MaskHi[] = { 1, 1, 3, 3 }; |
23862 | static const int MaskLo[] = { 0, 0, 2, 2 }; |
23863 | SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi); |
23864 | SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo); |
23865 | SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); |
23866 | |
23867 | SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo); |
23868 | Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi); |
23869 | |
23870 | if (Invert) |
23871 | Result = DAG.getNOT(dl, Result, MVT::v4i32); |
23872 | |
23873 | return DAG.getBitcast(VT, Result); |
23874 | } |
23875 | |
23876 | if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) { |
23877 | |
23878 | |
23879 | assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!"); |
23880 | |
23881 | |
23882 | Op0 = DAG.getBitcast(MVT::v4i32, Op0); |
23883 | Op1 = DAG.getBitcast(MVT::v4i32, Op1); |
23884 | |
23885 | |
23886 | SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); |
23887 | |
23888 | |
23889 | static const int Mask[] = { 1, 0, 3, 2 }; |
23890 | SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); |
23891 | Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); |
23892 | |
23893 | if (Invert) |
23894 | Result = DAG.getNOT(dl, Result, MVT::v4i32); |
23895 | |
23896 | return DAG.getBitcast(VT, Result); |
23897 | } |
23898 | } |
23899 | |
23900 | |
23901 | |
23902 | if (FlipSigns) { |
23903 | MVT EltVT = VT.getVectorElementType(); |
23904 | SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, |
23905 | VT); |
23906 | Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); |
23907 | Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); |
23908 | } |
23909 | |
23910 | SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); |
23911 | |
23912 | |
23913 | if (Invert) |
23914 | Result = DAG.getNOT(dl, Result, VT); |
23915 | |
23916 | return Result; |
23917 | } |
23918 | |
23919 | |
23920 | static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC, |
23921 | const SDLoc &dl, SelectionDAG &DAG, |
23922 | const X86Subtarget &Subtarget, |
23923 | SDValue &X86CC) { |
23924 | |
23925 | if (CC != ISD::SETEQ && CC != ISD::SETNE) |
23926 | return SDValue(); |
23927 | |
23928 | |
23929 | if (Op0.getOpcode() != ISD::BITCAST) |
23930 | return SDValue(); |
23931 | |
23932 | Op0 = Op0.getOperand(0); |
23933 | MVT VT = Op0.getSimpleValueType(); |
23934 | if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) && |
23935 | !(Subtarget.hasDQI() && VT == MVT::v8i1) && |
23936 | !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))) |
23937 | return SDValue(); |
23938 | |
23939 | X86::CondCode X86Cond; |
23940 | if (isNullConstant(Op1)) { |
23941 | X86Cond = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; |
23942 | } else if (isAllOnesConstant(Op1)) { |
23943 | |
23944 | X86Cond = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE; |
23945 | } else |
23946 | return SDValue(); |
23947 | |
23948 | |
23949 | bool KTestable = false; |
23950 | if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) |
23951 | KTestable = true; |
23952 | if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)) |
23953 | KTestable = true; |
23954 | if (!isNullConstant(Op1)) |
23955 | KTestable = false; |
23956 | if (KTestable && Op0.getOpcode() == ISD::AND && Op0.hasOneUse()) { |
23957 | SDValue LHS = Op0.getOperand(0); |
23958 | SDValue RHS = Op0.getOperand(1); |
23959 | X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
23960 | return DAG.getNode(X86ISD::KTEST, dl, MVT::i32, LHS, RHS); |
23961 | } |
23962 | |
23963 | |
23964 | SDValue LHS = Op0; |
23965 | SDValue RHS = Op0; |
23966 | if (Op0.getOpcode() == ISD::OR && Op0.hasOneUse()) { |
23967 | LHS = Op0.getOperand(0); |
23968 | RHS = Op0.getOperand(1); |
23969 | } |
23970 | |
23971 | X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
23972 | return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); |
23973 | } |
23974 | |
23975 | |
23976 | |
23977 | SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, |
23978 | ISD::CondCode CC, const SDLoc &dl, |
23979 | SelectionDAG &DAG, |
23980 | SDValue &X86CC) const { |
23981 | |
23982 | |
23983 | |
23984 | |
23985 | if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) && |
23986 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
23987 | if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC)) |
23988 | return BT; |
23989 | } |
23990 | |
23991 | |
23992 | |
23993 | if (isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) |
23994 | if (SDValue CmpZ = |
23995 | MatchVectorAllZeroTest(Op0, CC, dl, Subtarget, DAG, X86CC)) |
23996 | return CmpZ; |
23997 | |
23998 | |
23999 | if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC)) |
24000 | return Test; |
24001 | |
24002 | |
24003 | |
24004 | if ((isOneConstant(Op1) || isNullConstant(Op1)) && |
24005 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
24006 | |
24007 | |
24008 | if (Op0.getOpcode() == X86ISD::SETCC) { |
24009 | bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1); |
24010 | |
24011 | X86CC = Op0.getOperand(0); |
24012 | if (Invert) { |
24013 | X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); |
24014 | CCode = X86::GetOppositeBranchCondition(CCode); |
24015 | X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8); |
24016 | } |
24017 | |
24018 | return Op0.getOperand(1); |
24019 | } |
24020 | } |
24021 | |
24022 | |
24023 | |
24024 | if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD && |
24025 | Op0.getOperand(1) == Op1 && (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
24026 | if (isProfitableToUseFlagOp(Op0)) { |
24027 | SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); |
24028 | |
24029 | SDValue New = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(0), |
24030 | Op0.getOperand(1)); |
24031 | DAG.ReplaceAllUsesOfValueWith(SDValue(Op0.getNode(), 0), New); |
24032 | X86::CondCode CCode = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; |
24033 | X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8); |
24034 | return SDValue(New.getNode(), 1); |
24035 | } |
24036 | } |
24037 | |
24038 | X86::CondCode CondCode = |
24039 | TranslateX86CC(CC, dl, false, Op0, Op1, DAG); |
24040 | assert(CondCode != X86::COND_INVALID && "Unexpected condition code!"); |
24041 | |
24042 | SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget); |
24043 | X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); |
24044 | return EFLAGS; |
24045 | } |
24046 | |
24047 | SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { |
24048 | |
24049 | bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC || |
24050 | Op.getOpcode() == ISD::STRICT_FSETCCS; |
24051 | MVT VT = Op->getSimpleValueType(0); |
24052 | |
24053 | if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); |
24054 | |
24055 | assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); |
24056 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); |
24057 | SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0); |
24058 | SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1); |
24059 | SDLoc dl(Op); |
24060 | ISD::CondCode CC = |
24061 | cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get(); |
24062 | |
24063 | |
24064 | |
24065 | if (Op0.getValueType() == MVT::f128) { |
24066 | softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1, Chain, |
24067 | Op.getOpcode() == ISD::STRICT_FSETCCS); |
24068 | |
24069 | |
24070 | if (!Op1.getNode()) { |
24071 | assert(Op0.getValueType() == Op.getValueType() && |
24072 | "Unexpected setcc expansion!"); |
24073 | if (IsStrict) |
24074 | return DAG.getMergeValues({Op0, Chain}, dl); |
24075 | return Op0; |
24076 | } |
24077 | } |
24078 | |
24079 | if (Op0.getSimpleValueType().isInteger()) { |
24080 | |
24081 | |
24082 | |
24083 | |
24084 | |
24085 | |
24086 | |
24087 | |
24088 | |
24089 | |
24090 | if (auto *Op1C = dyn_cast<ConstantSDNode>(Op1)) { |
24091 | const APInt &Op1Val = Op1C->getAPIntValue(); |
24092 | if (!Op1Val.isNullValue()) { |
24093 | |
24094 | if ((CC == ISD::CondCode::SETGT && !Op1Val.isMaxSignedValue()) || |
24095 | (CC == ISD::CondCode::SETUGT && !Op1Val.isMaxValue())) { |
24096 | APInt Op1ValPlusOne = Op1Val + 1; |
24097 | if (Op1ValPlusOne.isSignedIntN(32) && |
24098 | (!Op1Val.isSignedIntN(8) || Op1ValPlusOne.isSignedIntN(8))) { |
24099 | Op1 = DAG.getConstant(Op1ValPlusOne, dl, Op0.getValueType()); |
24100 | CC = CC == ISD::CondCode::SETGT ? ISD::CondCode::SETGE |
24101 | : ISD::CondCode::SETUGE; |
24102 | } |
24103 | } |
24104 | } |
24105 | } |
24106 | |
24107 | SDValue X86CC; |
24108 | SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC); |
24109 | SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); |
24110 | return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; |
24111 | } |
24112 | |
24113 | |
24114 | X86::CondCode CondCode = TranslateX86CC(CC, dl, true, Op0, Op1, DAG); |
24115 | if (CondCode == X86::COND_INVALID) |
24116 | return SDValue(); |
24117 | |
24118 | SDValue EFLAGS; |
24119 | if (IsStrict) { |
24120 | bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; |
24121 | EFLAGS = |
24122 | DAG.getNode(IsSignaling ? X86ISD::STRICT_FCMPS : X86ISD::STRICT_FCMP, |
24123 | dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1}); |
24124 | Chain = EFLAGS.getValue(1); |
24125 | } else { |
24126 | EFLAGS = DAG.getNode(X86ISD::FCMP, dl, MVT::i32, Op0, Op1); |
24127 | } |
24128 | |
24129 | SDValue X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); |
24130 | SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); |
24131 | return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; |
24132 | } |
24133 | |
24134 | SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const { |
24135 | SDValue LHS = Op.getOperand(0); |
24136 | SDValue RHS = Op.getOperand(1); |
24137 | SDValue Carry = Op.getOperand(2); |
24138 | SDValue Cond = Op.getOperand(3); |
24139 | SDLoc DL(Op); |
24140 | |
24141 | assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only."); |
24142 | X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get()); |
24143 | |
24144 | |
24145 | EVT CarryVT = Carry.getValueType(); |
24146 | Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), |
24147 | Carry, DAG.getAllOnesConstant(DL, CarryVT)); |
24148 | |
24149 | SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); |
24150 | SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1)); |
24151 | return getSETCC(CC, Cmp.getValue(1), DL, DAG); |
24152 | } |
24153 | |
24154 | |
24155 | |
24156 | |
24157 | |
24158 | static std::pair<SDValue, SDValue> |
24159 | getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) { |
24160 | assert(Op.getResNo() == 0 && "Unexpected result number!"); |
24161 | SDValue Value, Overflow; |
24162 | SDValue LHS = Op.getOperand(0); |
24163 | SDValue RHS = Op.getOperand(1); |
24164 | unsigned BaseOp = 0; |
24165 | SDLoc DL(Op); |
24166 | switch (Op.getOpcode()) { |
24167 | default: llvm_unreachable("Unknown ovf instruction!"); |
24168 | case ISD::SADDO: |
24169 | BaseOp = X86ISD::ADD; |
24170 | Cond = X86::COND_O; |
24171 | break; |
24172 | case ISD::UADDO: |
24173 | BaseOp = X86ISD::ADD; |
24174 | Cond = isOneConstant(RHS) ? X86::COND_E : X86::COND_B; |
24175 | break; |
24176 | case ISD::SSUBO: |
24177 | BaseOp = X86ISD::SUB; |
24178 | Cond = X86::COND_O; |
24179 | break; |
24180 | case ISD::USUBO: |
24181 | BaseOp = X86ISD::SUB; |
24182 | Cond = X86::COND_B; |
24183 | break; |
24184 | case ISD::SMULO: |
24185 | BaseOp = X86ISD::SMUL; |
24186 | Cond = X86::COND_O; |
24187 | break; |
24188 | case ISD::UMULO: |
24189 | BaseOp = X86ISD::UMUL; |
24190 | Cond = X86::COND_O; |
24191 | break; |
24192 | } |
24193 | |
24194 | if (BaseOp) { |
24195 | |
24196 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
24197 | Value = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); |
24198 | Overflow = Value.getValue(1); |
24199 | } |
24200 | |
24201 | return std::make_pair(Value, Overflow); |
24202 | } |
24203 | |
24204 | static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { |
24205 | |
24206 | |
24207 | |
24208 | |
24209 | SDLoc DL(Op); |
24210 | X86::CondCode Cond; |
24211 | SDValue Value, Overflow; |
24212 | std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG); |
24213 | |
24214 | SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG); |
24215 | assert(Op->getValueType(1) == MVT::i8 && "Unexpected VT!"); |
24216 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC); |
24217 | } |
24218 | |
24219 | |
24220 | static bool isX86LogicalCmp(SDValue Op) { |
24221 | unsigned Opc = Op.getOpcode(); |
24222 | if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI || |
24223 | Opc == X86ISD::FCMP) |
24224 | return true; |
24225 | if (Op.getResNo() == 1 && |
24226 | (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC || |
24227 | Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || |
24228 | Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND)) |
24229 | return true; |
24230 | |
24231 | return false; |
24232 | } |
24233 | |
24234 | static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) { |
24235 | if (V.getOpcode() != ISD::TRUNCATE) |
24236 | return false; |
24237 | |
24238 | SDValue VOp0 = V.getOperand(0); |
24239 | unsigned InBits = VOp0.getValueSizeInBits(); |
24240 | unsigned Bits = V.getValueSizeInBits(); |
24241 | return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); |
24242 | } |
24243 | |
24244 | SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
24245 | bool AddTest = true; |
24246 | SDValue Cond = Op.getOperand(0); |
24247 | SDValue Op1 = Op.getOperand(1); |
24248 | SDValue Op2 = Op.getOperand(2); |
24249 | SDLoc DL(Op); |
24250 | MVT VT = Op1.getSimpleValueType(); |
24251 | SDValue CC; |
24252 | |
24253 | |
24254 | |
24255 | |
24256 | if (Cond.getOpcode() == ISD::SETCC && isScalarFPTypeInSSEReg(VT) && |
24257 | VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { |
24258 | SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); |
24259 | bool IsAlwaysSignaling; |
24260 | unsigned SSECC = |
24261 | translateX86FSETCC(cast<CondCodeSDNode>(Cond.getOperand(2))->get(), |
24262 | CondOp0, CondOp1, IsAlwaysSignaling); |
24263 | |
24264 | if (Subtarget.hasAVX512()) { |
24265 | SDValue Cmp = |
24266 | DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, |
24267 | DAG.getTargetConstant(SSECC, DL, MVT::i8)); |
24268 | assert(!VT.isVector() && "Not a scalar type?"); |
24269 | return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); |
24270 | } |
24271 | |
24272 | if (SSECC < 8 || Subtarget.hasAVX()) { |
24273 | SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, |
24274 | DAG.getTargetConstant(SSECC, DL, MVT::i8)); |
24275 | |
24276 | |
24277 | |
24278 | |
24279 | |
24280 | |
24281 | |
24282 | |
24283 | |
24284 | |
24285 | |
24286 | |
24287 | |
24288 | if (Subtarget.hasAVX() && !isNullFPConstant(Op1) && |
24289 | !isNullFPConstant(Op2)) { |
24290 | |
24291 | |
24292 | MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; |
24293 | SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1); |
24294 | SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2); |
24295 | SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); |
24296 | |
24297 | MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; |
24298 | VCmp = DAG.getBitcast(VCmpVT, VCmp); |
24299 | |
24300 | SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2); |
24301 | |
24302 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
24303 | VSel, DAG.getIntPtrConstant(0, DL)); |
24304 | } |
24305 | SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); |
24306 | SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); |
24307 | return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And); |
24308 | } |
24309 | } |
24310 | |
24311 | |
24312 | if (isScalarFPTypeInSSEReg(VT) && Subtarget.hasAVX512()) { |
24313 | SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond); |
24314 | return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); |
24315 | } |
24316 | |
24317 | if (Cond.getOpcode() == ISD::SETCC) { |
24318 | if (SDValue NewCond = LowerSETCC(Cond, DAG)) { |
24319 | Cond = NewCond; |
24320 | |
24321 | |
24322 | |
24323 | Op1 = Op.getOperand(1); |
24324 | Op2 = Op.getOperand(2); |
24325 | } |
24326 | } |
24327 | |
24328 | |
24329 | |
24330 | |
24331 | |
24332 | |
24333 | |
24334 | if (Cond.getOpcode() == X86ISD::SETCC && |
24335 | Cond.getOperand(1).getOpcode() == X86ISD::CMP && |
24336 | isNullConstant(Cond.getOperand(1).getOperand(1))) { |
24337 | SDValue Cmp = Cond.getOperand(1); |
24338 | SDValue CmpOp0 = Cmp.getOperand(0); |
24339 | unsigned CondCode = Cond.getConstantOperandVal(0); |
24340 | |
24341 | |
24342 | |
24343 | |
24344 | |
24345 | |
24346 | auto MatchFFSMinus1 = [&](SDValue Op1, SDValue Op2) { |
24347 | return (Op1.getOpcode() == ISD::CTTZ_ZERO_UNDEF && Op1.hasOneUse() && |
24348 | Op1.getOperand(0) == CmpOp0 && isAllOnesConstant(Op2)); |
24349 | }; |
24350 | if (Subtarget.hasCMov() && (VT == MVT::i32 || VT == MVT::i64) && |
24351 | ((CondCode == X86::COND_NE && MatchFFSMinus1(Op1, Op2)) || |
24352 | (CondCode == X86::COND_E && MatchFFSMinus1(Op2, Op1)))) { |
24353 | |
24354 | } else if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && |
24355 | (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { |
24356 | SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2; |
24357 | |
24358 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
24359 | SDVTList CmpVTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); |
24360 | |
24361 | |
24362 | |
24363 | |
24364 | if (isNullConstant(Y) && |
24365 | (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { |
24366 | SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType()); |
24367 | SDValue Neg = DAG.getNode(X86ISD::SUB, DL, CmpVTs, Zero, CmpOp0); |
24368 | Zero = DAG.getConstant(0, DL, Op.getValueType()); |
24369 | return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Neg.getValue(1)); |
24370 | } |
24371 | |
24372 | Cmp = DAG.getNode(X86ISD::SUB, DL, CmpVTs, |
24373 | CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); |
24374 | |
24375 | SDValue Zero = DAG.getConstant(0, DL, Op.getValueType()); |
24376 | SDValue Res = |
24377 | DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp.getValue(1)); |
24378 | |
24379 | if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E)) |
24380 | Res = DAG.getNOT(DL, Res, Res.getValueType()); |
24381 | |
24382 | return DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); |
24383 | } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E && |
24384 | Cmp.getOperand(0).getOpcode() == ISD::AND && |
24385 | isOneConstant(Cmp.getOperand(0).getOperand(1))) { |
24386 | SDValue Src1, Src2; |
24387 | |
24388 | |
24389 | |
24390 | auto isOrXorPattern = [&]() { |
24391 | if ((Op2.getOpcode() == ISD::XOR || Op2.getOpcode() == ISD::OR) && |
24392 | (Op2.getOperand(0) == Op1 || Op2.getOperand(1) == Op1)) { |
24393 | Src1 = |
24394 | Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0); |
24395 | Src2 = Op1; |
24396 | return true; |
24397 | } |
24398 | return false; |
24399 | }; |
24400 | |
24401 | if (isOrXorPattern()) { |
24402 | SDValue Neg; |
24403 | unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits(); |
24404 | |
24405 | |
24406 | if (CmpSz > VT.getSizeInBits()) |
24407 | Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0); |
24408 | else if (CmpSz < VT.getSizeInBits()) |
24409 | Neg = DAG.getNode(ISD::AND, DL, VT, |
24410 | DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)), |
24411 | DAG.getConstant(1, DL, VT)); |
24412 | else |
24413 | Neg = CmpOp0; |
24414 | SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
24415 | Neg); |
24416 | SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); |
24417 | return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); |
24418 | } |
24419 | } |
24420 | } |
24421 | |
24422 | |
24423 | if (Cond.getOpcode() == ISD::AND && |
24424 | Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && |
24425 | isOneConstant(Cond.getOperand(1))) |
24426 | Cond = Cond.getOperand(0); |
24427 | |
24428 | |
24429 | |
24430 | unsigned CondOpcode = Cond.getOpcode(); |
24431 | if (CondOpcode == X86ISD::SETCC || |
24432 | CondOpcode == X86ISD::SETCC_CARRY) { |
24433 | CC = Cond.getOperand(0); |
24434 | |
24435 | SDValue Cmp = Cond.getOperand(1); |
24436 | bool IllegalFPCMov = false; |
24437 | if (VT.isFloatingPoint() && !VT.isVector() && |
24438 | !isScalarFPTypeInSSEReg(VT) && Subtarget.hasCMov()) |
24439 | IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue()); |
24440 | |
24441 | if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || |
24442 | Cmp.getOpcode() == X86ISD::BT) { |
24443 | Cond = Cmp; |
24444 | AddTest = false; |
24445 | } |
24446 | } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || |
24447 | CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || |
24448 | CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) { |
24449 | SDValue Value; |
24450 | X86::CondCode X86Cond; |
24451 | std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); |
24452 | |
24453 | CC = DAG.getTargetConstant(X86Cond, DL, MVT::i8); |
24454 | AddTest = false; |
24455 | } |
24456 | |
24457 | if (AddTest) { |
24458 | |
24459 | if (isTruncWithZeroHighBitsInput(Cond, DAG)) |
24460 | Cond = Cond.getOperand(0); |
24461 | |
24462 | |
24463 | |
24464 | if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { |
24465 | SDValue BTCC; |
24466 | if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) { |
24467 | CC = BTCC; |
24468 | Cond = BT; |
24469 | AddTest = false; |
24470 | } |
24471 | } |
24472 | } |
24473 | |
24474 | if (AddTest) { |
24475 | CC = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8); |
24476 | Cond = EmitTest(Cond, X86::COND_NE, DL, DAG, Subtarget); |
24477 | } |
24478 | |
24479 | |
24480 | |
24481 | |
24482 | |
24483 | if (Cond.getOpcode() == X86ISD::SUB) { |
24484 | unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); |
24485 | |
24486 | if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && |
24487 | (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && |
24488 | (isNullConstant(Op1) || isNullConstant(Op2))) { |
24489 | SDValue Res = |
24490 | DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), |
24491 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cond); |
24492 | if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) |
24493 | return DAG.getNOT(DL, Res, Res.getValueType()); |
24494 | return Res; |
24495 | } |
24496 | } |
24497 | |
24498 | |
24499 | |
24500 | |
24501 | if (Op.getValueType() == MVT::i8 && |
24502 | Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) { |
24503 | SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0); |
24504 | if (T1.getValueType() == T2.getValueType() && |
24505 | |
24506 | T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){ |
24507 | SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, |
24508 | CC, Cond); |
24509 | return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); |
24510 | } |
24511 | } |
24512 | |
24513 | |
24514 | |
24515 | |
24516 | |
24517 | |
24518 | |
24519 | if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) || |
24520 | (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && |
24521 | !MayFoldLoad(Op2))) { |
24522 | Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); |
24523 | Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); |
24524 | SDValue Ops[] = { Op2, Op1, CC, Cond }; |
24525 | SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops); |
24526 | return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); |
24527 | } |
24528 | |
24529 | |
24530 | |
24531 | SDValue Ops[] = { Op2, Op1, CC, Cond }; |
24532 | return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); |
24533 | } |
24534 | |
24535 | static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, |
24536 | const X86Subtarget &Subtarget, |
24537 | SelectionDAG &DAG) { |
24538 | MVT VT = Op->getSimpleValueType(0); |
24539 | SDValue In = Op->getOperand(0); |
24540 | MVT InVT = In.getSimpleValueType(); |
24541 | assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); |
24542 | MVT VTElt = VT.getVectorElementType(); |
24543 | SDLoc dl(Op); |
24544 | |
24545 | unsigned NumElts = VT.getVectorNumElements(); |
24546 | |
24547 | |
24548 | MVT ExtVT = VT; |
24549 | if (!Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16) { |
24550 | |
24551 | if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) |
24552 | return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG); |
24553 | |
24554 | ExtVT = MVT::getVectorVT(MVT::i32, NumElts); |
24555 | } |
24556 | |
24557 | |
24558 | MVT WideVT = ExtVT; |
24559 | if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { |
24560 | NumElts *= 512 / ExtVT.getSizeInBits(); |
24561 | InVT = MVT::getVectorVT(MVT::i1, NumElts); |
24562 | In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT), |
24563 | In, DAG.getIntPtrConstant(0, dl)); |
24564 | WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts); |
24565 | } |
24566 | |
24567 | SDValue V; |
24568 | MVT WideEltVT = WideVT.getVectorElementType(); |
24569 | if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) || |
24570 | (Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) { |
24571 | V = DAG.getNode(Op.getOpcode(), dl, WideVT, In); |
24572 | } else { |
24573 | SDValue NegOne = DAG.getConstant(-1, dl, WideVT); |
24574 | SDValue Zero = DAG.getConstant(0, dl, WideVT); |
24575 | V = DAG.getSelect(dl, WideVT, In, NegOne, Zero); |
24576 | } |
24577 | |
24578 | |
24579 | if (VT != ExtVT) { |
24580 | WideVT = MVT::getVectorVT(VTElt, NumElts); |
24581 | V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V); |
24582 | } |
24583 | |
24584 | |
24585 | if (WideVT != VT) |
24586 | V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, V, |
24587 | DAG.getIntPtrConstant(0, dl)); |
24588 | |
24589 | return V; |
24590 | } |
24591 | |
24592 | static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, |
24593 | SelectionDAG &DAG) { |
24594 | SDValue In = Op->getOperand(0); |
24595 | MVT InVT = In.getSimpleValueType(); |
24596 | |
24597 | if (InVT.getVectorElementType() == MVT::i1) |
24598 | return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); |
24599 | |
24600 | assert(Subtarget.hasAVX() && "Expected AVX support"); |
24601 | return LowerAVXExtend(Op, DAG, Subtarget); |
24602 | } |
24603 | |
24604 | |
24605 | |
24606 | |
24607 | |
24608 | static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, |
24609 | const X86Subtarget &Subtarget, |
24610 | SelectionDAG &DAG) { |
24611 | SDValue In = Op->getOperand(0); |
24612 | MVT VT = Op->getSimpleValueType(0); |
24613 | MVT InVT = In.getSimpleValueType(); |
24614 | |
24615 | MVT SVT = VT.getVectorElementType(); |
24616 | MVT InSVT = InVT.getVectorElementType(); |
24617 | assert(SVT.getFixedSizeInBits() > InSVT.getFixedSizeInBits()); |
24618 | |
24619 | if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16) |
24620 | return SDValue(); |
24621 | if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) |
24622 | return SDValue(); |
24623 | if (!(VT.is128BitVector() && Subtarget.hasSSE2()) && |
24624 | !(VT.is256BitVector() && Subtarget.hasAVX()) && |
24625 | !(VT.is512BitVector() && Subtarget.hasAVX512())) |
24626 | return SDValue(); |
24627 | |
24628 | SDLoc dl(Op); |
24629 | unsigned Opc = Op.getOpcode(); |
24630 | unsigned NumElts = VT.getVectorNumElements(); |
24631 | |
24632 | |
24633 | |
24634 | if (InVT.getSizeInBits() > 128) { |
24635 | |
24636 | |
24637 | int InSize = InSVT.getSizeInBits() * NumElts; |
24638 | In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128)); |
24639 | InVT = In.getSimpleValueType(); |
24640 | } |
24641 | |
24642 | |
24643 | |
24644 | |
24645 | if (Subtarget.hasInt256()) { |
24646 | assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension"); |
24647 | |
24648 | if (InVT.getVectorNumElements() != NumElts) |
24649 | return DAG.getNode(Op.getOpcode(), dl, VT, In); |
24650 | |
24651 | |
24652 | |
24653 | unsigned ExtOpc = |
24654 | Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND |
24655 | : ISD::ZERO_EXTEND; |
24656 | return DAG.getNode(ExtOpc, dl, VT, In); |
24657 | } |
24658 | |
24659 | |
24660 | if (Subtarget.hasAVX()) { |
24661 | assert(VT.is256BitVector() && "256-bit vector expected"); |
24662 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
24663 | int HalfNumElts = HalfVT.getVectorNumElements(); |
24664 | |
24665 | unsigned NumSrcElts = InVT.getVectorNumElements(); |
24666 | SmallVector<int, 16> HiMask(NumSrcElts, SM_SentinelUndef); |
24667 | for (int i = 0; i != HalfNumElts; ++i) |
24668 | HiMask[i] = HalfNumElts + i; |
24669 | |
24670 | SDValue Lo = DAG.getNode(Opc, dl, HalfVT, In); |
24671 | SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, DAG.getUNDEF(InVT), HiMask); |
24672 | Hi = DAG.getNode(Opc, dl, HalfVT, Hi); |
24673 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
24674 | } |
24675 | |
24676 | |
24677 | assert(Opc == ISD::SIGN_EXTEND_VECTOR_INREG && "Unexpected opcode!"); |
24678 | assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs"); |
24679 | |
24680 | |
24681 | SDValue Curr = In; |
24682 | SDValue SignExt = Curr; |
24683 | |
24684 | |
24685 | |
24686 | if (InVT != MVT::v4i32) { |
24687 | MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT; |
24688 | |
24689 | unsigned DestWidth = DestVT.getScalarSizeInBits(); |
24690 | unsigned Scale = DestWidth / InSVT.getSizeInBits(); |
24691 | |
24692 | unsigned InNumElts = InVT.getVectorNumElements(); |
24693 | unsigned DestElts = DestVT.getVectorNumElements(); |
24694 | |
24695 | |
24696 | |
24697 | SmallVector<int, 16> Mask(InNumElts, SM_SentinelUndef); |
24698 | for (unsigned i = 0; i != DestElts; ++i) |
24699 | Mask[i * Scale + (Scale - 1)] = i; |
24700 | |
24701 | Curr = DAG.getVectorShuffle(InVT, dl, In, In, Mask); |
24702 | Curr = DAG.getBitcast(DestVT, Curr); |
24703 | |
24704 | unsigned SignExtShift = DestWidth - InSVT.getSizeInBits(); |
24705 | SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr, |
24706 | DAG.getTargetConstant(SignExtShift, dl, MVT::i8)); |
24707 | } |
24708 | |
24709 | if (VT == MVT::v2i64) { |
24710 | assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT"); |
24711 | SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32); |
24712 | SDValue Sign = DAG.getSetCC(dl, MVT::v4i32, Zero, Curr, ISD::SETGT); |
24713 | SignExt = DAG.getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5}); |
24714 | SignExt = DAG.getBitcast(VT, SignExt); |
24715 | } |
24716 | |
24717 | return SignExt; |
24718 | } |
24719 | |
24720 | static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, |
24721 | SelectionDAG &DAG) { |
24722 | MVT VT = Op->getSimpleValueType(0); |
24723 | SDValue In = Op->getOperand(0); |
24724 | MVT InVT = In.getSimpleValueType(); |
24725 | SDLoc dl(Op); |
24726 | |
24727 | if (InVT.getVectorElementType() == MVT::i1) |
24728 | return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); |
24729 | |
24730 | assert(VT.isVector() && InVT.isVector() && "Expected vector type"); |
24731 | assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && |
24732 | "Expected same number of elements"); |
24733 | assert((VT.getVectorElementType() == MVT::i16 || |
24734 | VT.getVectorElementType() == MVT::i32 || |
24735 | VT.getVectorElementType() == MVT::i64) && |
24736 | "Unexpected element type"); |
24737 | assert((InVT.getVectorElementType() == MVT::i8 || |
24738 | InVT.getVectorElementType() == MVT::i16 || |
24739 | InVT.getVectorElementType() == MVT::i32) && |
24740 | "Unexpected element type"); |
24741 | |
24742 | if (VT == MVT::v32i16 && !Subtarget.hasBWI()) { |
24743 | assert(InVT == MVT::v32i8 && "Unexpected VT!"); |
24744 | return splitVectorIntUnary(Op, DAG); |
24745 | } |
24746 | |
24747 | if (Subtarget.hasInt256()) |
24748 | return Op; |
24749 | |
24750 | |
24751 | |
24752 | |
24753 | |
24754 | |
24755 | |
24756 | |
24757 | |
24758 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
24759 | SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In); |
24760 | |
24761 | unsigned NumElems = InVT.getVectorNumElements(); |
24762 | SmallVector<int,8> ShufMask(NumElems, -1); |
24763 | for (unsigned i = 0; i != NumElems/2; ++i) |
24764 | ShufMask[i] = i + NumElems/2; |
24765 | |
24766 | SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask); |
24767 | OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi); |
24768 | |
24769 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); |
24770 | } |
24771 | |
24772 | |
24773 | static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) { |
24774 | SDValue StoredVal = Store->getValue(); |
24775 | assert((StoredVal.getValueType().is256BitVector() || |
24776 | StoredVal.getValueType().is512BitVector()) && |
24777 | "Expecting 256/512-bit op"); |
24778 | |
24779 | |
24780 | |
24781 | |
24782 | |
24783 | |
24784 | if (!Store->isSimple()) |
24785 | return SDValue(); |
24786 | |
24787 | SDLoc DL(Store); |
24788 | SDValue Value0, Value1; |
24789 | std::tie(Value0, Value1) = splitVector(StoredVal, DAG, DL); |
24790 | unsigned HalfOffset = Value0.getValueType().getStoreSize(); |
24791 | SDValue Ptr0 = Store->getBasePtr(); |
24792 | SDValue Ptr1 = |
24793 | DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(HalfOffset), DL); |
24794 | SDValue Ch0 = |
24795 | DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(), |
24796 | Store->getOriginalAlign(), |
24797 | Store->getMemOperand()->getFlags()); |
24798 | SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1, |
24799 | Store->getPointerInfo().getWithOffset(HalfOffset), |
24800 | Store->getOriginalAlign(), |
24801 | Store->getMemOperand()->getFlags()); |
24802 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1); |
24803 | } |
24804 | |
24805 | |
24806 | |
24807 | static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, |
24808 | SelectionDAG &DAG) { |
24809 | SDValue StoredVal = Store->getValue(); |
24810 | assert(StoreVT.is128BitVector() && |
24811 | StoredVal.getValueType().is128BitVector() && "Expecting 128-bit op"); |
24812 | StoredVal = DAG.getBitcast(StoreVT, StoredVal); |
24813 | |
24814 | |
24815 | |
24816 | |
24817 | if (!Store->isSimple()) |
24818 | return SDValue(); |
24819 | |
24820 | MVT StoreSVT = StoreVT.getScalarType(); |
24821 | unsigned NumElems = StoreVT.getVectorNumElements(); |
24822 | unsigned ScalarSize = StoreSVT.getStoreSize(); |
24823 | |
24824 | SDLoc DL(Store); |
24825 | SmallVector<SDValue, 4> Stores; |
24826 | for (unsigned i = 0; i != NumElems; ++i) { |
24827 | unsigned Offset = i * ScalarSize; |
24828 | SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(), |
24829 | TypeSize::Fixed(Offset), DL); |
24830 | SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal, |
24831 | DAG.getIntPtrConstant(i, DL)); |
24832 | SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr, |
24833 | Store->getPointerInfo().getWithOffset(Offset), |
24834 | Store->getOriginalAlign(), |
24835 | Store->getMemOperand()->getFlags()); |
24836 | Stores.push_back(Ch); |
24837 | } |
24838 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); |
24839 | } |
24840 | |
24841 | static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, |
24842 | SelectionDAG &DAG) { |
24843 | StoreSDNode *St = cast<StoreSDNode>(Op.getNode()); |
24844 | SDLoc dl(St); |
24845 | SDValue StoredVal = St->getValue(); |
24846 | |
24847 | |
24848 | if (StoredVal.getValueType().isVector() && |
24849 | StoredVal.getValueType().getVectorElementType() == MVT::i1) { |
24850 | unsigned NumElts = StoredVal.getValueType().getVectorNumElements(); |
24851 | assert(NumElts <= 8 && "Unexpected VT"); |
24852 | assert(!St->isTruncatingStore() && "Expected non-truncating store"); |
24853 | assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && |
24854 | "Expected AVX512F without AVX512DQI"); |
24855 | |
24856 | |
24857 | StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, |
24858 | DAG.getUNDEF(MVT::v16i1), StoredVal, |
24859 | DAG.getIntPtrConstant(0, dl)); |
24860 | StoredVal = DAG.getBitcast(MVT::i16, StoredVal); |
24861 | StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal); |
24862 | |
24863 | if (NumElts < 8) |
24864 | StoredVal = DAG.getZeroExtendInReg( |
24865 | StoredVal, dl, EVT::getIntegerVT(*DAG.getContext(), NumElts)); |
24866 | |
24867 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
24868 | St->getPointerInfo(), St->getOriginalAlign(), |
24869 | St->getMemOperand()->getFlags()); |
24870 | } |
24871 | |
24872 | if (St->isTruncatingStore()) |
24873 | return SDValue(); |
24874 | |
24875 | |
24876 | |
24877 | |
24878 | |
24879 | MVT StoreVT = StoredVal.getSimpleValueType(); |
24880 | if (StoreVT.is256BitVector() || |
24881 | ((StoreVT == MVT::v32i16 || StoreVT == MVT::v64i8) && |
24882 | !Subtarget.hasBWI())) { |
24883 | SmallVector<SDValue, 4> CatOps; |
24884 | if (StoredVal.hasOneUse() && collectConcatOps(StoredVal.getNode(), CatOps)) |
24885 | return splitVectorStore(St, DAG); |
24886 | return SDValue(); |
24887 | } |
24888 | |
24889 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
24890 | assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 && |
24891 | "Unexpected VT"); |
24892 | assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) == |
24893 | TargetLowering::TypeWidenVector && "Unexpected type action!"); |
24894 | |
24895 | EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT); |
24896 | StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal, |
24897 | DAG.getUNDEF(StoreVT)); |
24898 | |
24899 | if (Subtarget.hasSSE2()) { |
24900 | |
24901 | |
24902 | MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64; |
24903 | MVT CastVT = MVT::getVectorVT(StVT, 2); |
24904 | StoredVal = DAG.getBitcast(CastVT, StoredVal); |
24905 | StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal, |
24906 | DAG.getIntPtrConstant(0, dl)); |
24907 | |
24908 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
24909 | St->getPointerInfo(), St->getOriginalAlign(), |
24910 | St->getMemOperand()->getFlags()); |
24911 | } |
24912 | assert(Subtarget.hasSSE1() && "Expected SSE"); |
24913 | SDVTList Tys = DAG.getVTList(MVT::Other); |
24914 | SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()}; |
24915 | return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, MVT::i64, |
24916 | St->getMemOperand()); |
24917 | } |
24918 | |
24919 | |
24920 | |
24921 | |
24922 | |
24923 | |
24924 | |
24925 | |
24926 | static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, |
24927 | SelectionDAG &DAG) { |
24928 | MVT RegVT = Op.getSimpleValueType(); |
24929 | assert(RegVT.isVector() && "We only custom lower vector loads."); |
24930 | assert(RegVT.isInteger() && |
24931 | "We only custom lower integer vector loads."); |
24932 | |
24933 | LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode()); |
24934 | SDLoc dl(Ld); |
24935 | |
24936 | |
24937 | if (RegVT.getVectorElementType() == MVT::i1) { |
24938 | assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load"); |
24939 | assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT"); |
24940 | assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && |
24941 | "Expected AVX512F without AVX512DQI"); |
24942 | |
24943 | SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), |
24944 | Ld->getPointerInfo(), Ld->getOriginalAlign(), |
24945 | Ld->getMemOperand()->getFlags()); |
24946 | |
24947 | |
24948 | assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!"); |
24949 | |
24950 | SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd); |
24951 | Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT, |
24952 | DAG.getBitcast(MVT::v16i1, Val), |
24953 | DAG.getIntPtrConstant(0, dl)); |
24954 | return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl); |
24955 | } |
24956 | |
24957 | return SDValue(); |
24958 | } |
24959 | |
24960 | |
24961 | |
24962 | static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { |
24963 | Opc = Op.getOpcode(); |
24964 | if (Opc != ISD::OR && Opc != ISD::AND) |
24965 | return false; |
24966 | return (Op.getOperand(0).getOpcode() == X86ISD::SETCC && |
24967 | Op.getOperand(0).hasOneUse() && |
24968 | Op.getOperand(1).getOpcode() == X86ISD::SETCC && |
24969 | Op.getOperand(1).hasOneUse()); |
24970 | } |
24971 | |
24972 | SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
24973 | SDValue Chain = Op.getOperand(0); |
24974 | SDValue Cond = Op.getOperand(1); |
24975 | SDValue Dest = Op.getOperand(2); |
24976 | SDLoc dl(Op); |
24977 | |
24978 | if (Cond.getOpcode() == ISD::SETCC && |
24979 | Cond.getOperand(0).getValueType() != MVT::f128) { |
24980 | SDValue LHS = Cond.getOperand(0); |
24981 | SDValue RHS = Cond.getOperand(1); |
24982 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
24983 | |
24984 | |
24985 | |
24986 | |
24987 | if (ISD::isOverflowIntrOpRes(LHS) && |
24988 | (CC == ISD::SETEQ || CC == ISD::SETNE) && |
24989 | (isNullConstant(RHS) || isOneConstant(RHS))) { |
24990 | SDValue Value, Overflow; |
24991 | X86::CondCode X86Cond; |
24992 | std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, LHS.getValue(0), DAG); |
24993 | |
24994 | if ((CC == ISD::SETEQ) == isNullConstant(RHS)) |
24995 | X86Cond = X86::GetOppositeBranchCondition(X86Cond); |
24996 | |
24997 | SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
24998 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
24999 | Overflow); |
25000 | } |
25001 | |
25002 | if (LHS.getSimpleValueType().isInteger()) { |
25003 | SDValue CCVal; |
25004 | SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, CC, SDLoc(Cond), DAG, CCVal); |
25005 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
25006 | EFLAGS); |
25007 | } |
25008 | |
25009 | if (CC == ISD::SETOEQ) { |
25010 | |
25011 | |
25012 | |
25013 | |
25014 | |
25015 | if (Op.getNode()->hasOneUse()) { |
25016 | SDNode *User = *Op.getNode()->use_begin(); |
25017 | |
25018 | |
25019 | |
25020 | if (User->getOpcode() == ISD::BR) { |
25021 | SDValue FalseBB = User->getOperand(1); |
25022 | SDNode *NewBR = |
25023 | DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); |
25024 | assert(NewBR == User); |
25025 | (void)NewBR; |
25026 | Dest = FalseBB; |
25027 | |
25028 | SDValue Cmp = |
25029 | DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS); |
25030 | SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); |
25031 | Chain = DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, |
25032 | CCVal, Cmp); |
25033 | CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); |
25034 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
25035 | Cmp); |
25036 | } |
25037 | } |
25038 | } else if (CC == ISD::SETUNE) { |
25039 | |
25040 | |
25041 | |
25042 | SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS); |
25043 | SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); |
25044 | Chain = |
25045 | DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, Cmp); |
25046 | CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); |
25047 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
25048 | Cmp); |
25049 | } else { |
25050 | X86::CondCode X86Cond = |
25051 | TranslateX86CC(CC, dl, true, LHS, RHS, DAG); |
25052 | SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS); |
25053 | SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
25054 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
25055 | Cmp); |
25056 | } |
25057 | } |
25058 | |
25059 | if (ISD::isOverflowIntrOpRes(Cond)) { |
25060 | SDValue Value, Overflow; |
25061 | X86::CondCode X86Cond; |
25062 | std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); |
25063 | |
25064 | SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
25065 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
25066 | Overflow); |
25067 | } |
25068 | |
25069 | |
25070 | if (isTruncWithZeroHighBitsInput(Cond, DAG)) |
25071 | Cond = Cond.getOperand(0); |
25072 | |
25073 | EVT CondVT = Cond.getValueType(); |
25074 | |
25075 | |
25076 | if (!(Cond.getOpcode() == ISD::AND && isOneConstant(Cond.getOperand(1)))) |
25077 | Cond = |
25078 | DAG.getNode(ISD::AND, dl, CondVT, Cond, DAG.getConstant(1, dl, CondVT)); |
25079 | |
25080 | SDValue LHS = Cond; |
25081 | SDValue RHS = DAG.getConstant(0, dl, CondVT); |
25082 | |
25083 | SDValue CCVal; |
25084 | SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, ISD::SETNE, dl, DAG, CCVal); |
25085 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
25086 | EFLAGS); |
25087 | } |
25088 | |
25089 | |
25090 | |
25091 | |
25092 | |
25093 | |
25094 | SDValue |
25095 | X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, |
25096 | SelectionDAG &DAG) const { |
25097 | MachineFunction &MF = DAG.getMachineFunction(); |
25098 | bool SplitStack = MF.shouldSplitStack(); |
25099 | bool EmitStackProbeCall = hasStackProbeSymbol(MF); |
25100 | bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) || |
25101 | SplitStack || EmitStackProbeCall; |
25102 | SDLoc dl(Op); |
25103 | |
25104 | |
25105 | SDNode *Node = Op.getNode(); |
25106 | SDValue Chain = Op.getOperand(0); |
25107 | SDValue Size = Op.getOperand(1); |
25108 | MaybeAlign Alignment(Op.getConstantOperandVal(2)); |
25109 | EVT VT = Node->getValueType(0); |
25110 | |
25111 | |
25112 | |
25113 | Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); |
25114 | |
25115 | bool Is64Bit = Subtarget.is64Bit(); |
25116 | MVT SPTy = getPointerTy(DAG.getDataLayout()); |
25117 | |
25118 | SDValue Result; |
25119 | if (!Lower) { |
25120 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
25121 | Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); |
25122 | assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" |
25123 | " not tell us which reg is the stack pointer!"); |
25124 | |
25125 | const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); |
25126 | const Align StackAlign = TFI.getStackAlign(); |
25127 | if (hasInlineStackProbe(MF)) { |
25128 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
25129 | |
25130 | const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); |
25131 | Register Vreg = MRI.createVirtualRegister(AddrRegClass); |
25132 | Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); |
25133 | Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain, |
25134 | DAG.getRegister(Vreg, SPTy)); |
25135 | } else { |
25136 | SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); |
25137 | Chain = SP.getValue(1); |
25138 | Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); |
25139 | } |
25140 | if (Alignment && *Alignment > StackAlign) |
25141 | Result = |
25142 | DAG.getNode(ISD::AND, dl, VT, Result, |
25143 | DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT)); |
25144 | Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); |
25145 | } else if (SplitStack) { |
25146 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
25147 | |
25148 | if (Is64Bit) { |
25149 | |
25150 | |
25151 | const Function &F = MF.getFunction(); |
25152 | for (const auto &A : F.args()) { |
25153 | if (A.hasNestAttr()) |
25154 | report_fatal_error("Cannot use segmented stacks with functions that " |
25155 | "have nested arguments."); |
25156 | } |
25157 | } |
25158 | |
25159 | const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); |
25160 | Register Vreg = MRI.createVirtualRegister(AddrRegClass); |
25161 | Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); |
25162 | Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, |
25163 | DAG.getRegister(Vreg, SPTy)); |
25164 | } else { |
25165 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
25166 | Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size); |
25167 | MF.getInfo<X86MachineFunctionInfo>()->setHasWinAlloca(true); |
25168 | |
25169 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
25170 | Register SPReg = RegInfo->getStackRegister(); |
25171 | SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); |
25172 | Chain = SP.getValue(1); |
25173 | |
25174 | if (Alignment) { |
25175 | SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), |
25176 | DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT)); |
25177 | Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); |
25178 | } |
25179 | |
25180 | Result = SP; |
25181 | } |
25182 | |
25183 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), |
25184 | DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); |
25185 | |
25186 | SDValue Ops[2] = {Result, Chain}; |
25187 | return DAG.getMergeValues(Ops, dl); |
25188 | } |
25189 | |
25190 | SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { |
25191 | MachineFunction &MF = DAG.getMachineFunction(); |
25192 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
25193 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
25194 | |
25195 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
25196 | SDLoc DL(Op); |
25197 | |
25198 | if (!Subtarget.is64Bit() || |
25199 | Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) { |
25200 | |
25201 | |
25202 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
25203 | return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), |
25204 | MachinePointerInfo(SV)); |
25205 | } |
25206 | |
25207 | |
25208 | |
25209 | |
25210 | |
25211 | |
25212 | SmallVector<SDValue, 8> MemOps; |
25213 | SDValue FIN = Op.getOperand(1); |
25214 | |
25215 | SDValue Store = DAG.getStore( |
25216 | Op.getOperand(0), DL, |
25217 | DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN, |
25218 | MachinePointerInfo(SV)); |
25219 | MemOps.push_back(Store); |
25220 | |
25221 | |
25222 | FIN = DAG.getMemBasePlusOffset(FIN, TypeSize::Fixed(4), DL); |
25223 | Store = DAG.getStore( |
25224 | Op.getOperand(0), DL, |
25225 | DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN, |
25226 | MachinePointerInfo(SV, 4)); |
25227 | MemOps.push_back(Store); |
25228 | |
25229 | |
25230 | FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); |
25231 | SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
25232 | Store = |
25233 | DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8)); |
25234 | MemOps.push_back(Store); |
25235 | |
25236 | |
25237 | FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant( |
25238 | Subtarget.isTarget64BitLP64() ? 8 : 4, DL)); |
25239 | SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); |
25240 | Store = DAG.getStore( |
25241 | Op.getOperand(0), DL, RSFIN, FIN, |
25242 | MachinePointerInfo(SV, Subtarget.isTarget64BitLP64() ? 16 : 12)); |
25243 | MemOps.push_back(Store); |
25244 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
25245 | } |
25246 | |
25247 | SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { |
25248 | assert(Subtarget.is64Bit() && |
25249 | "LowerVAARG only handles 64-bit va_arg!"); |
25250 | assert(Op.getNumOperands() == 4); |
25251 | |
25252 | MachineFunction &MF = DAG.getMachineFunction(); |
25253 | if (Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) |
25254 | |
25255 | return DAG.expandVAArg(Op.getNode()); |
25256 | |
25257 | SDValue Chain = Op.getOperand(0); |
25258 | SDValue SrcPtr = Op.getOperand(1); |
25259 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
25260 | unsigned Align = Op.getConstantOperandVal(3); |
25261 | SDLoc dl(Op); |
25262 | |
25263 | EVT ArgVT = Op.getNode()->getValueType(0); |
25264 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
25265 | uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); |
25266 | uint8_t ArgMode; |
25267 | |
25268 | |
25269 | |
25270 | |
25271 | assert(ArgVT != MVT::f80 && "va_arg for f80 not yet implemented"); |
25272 | if (ArgVT.isFloatingPoint() && ArgSize <= 16 ) { |
25273 | ArgMode = 2; |
25274 | } else { |
25275 | assert(ArgVT.isInteger() && ArgSize <= 32 && |
25276 | "Unhandled argument type in LowerVAARG"); |
25277 | ArgMode = 1; |
25278 | } |
25279 | |
25280 | if (ArgMode == 2) { |
25281 | |
25282 | assert(!Subtarget.useSoftFloat() && |
25283 | !(MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) && |
25284 | Subtarget.hasSSE1()); |
25285 | } |
25286 | |
25287 | |
25288 | |
25289 | SDValue InstOps[] = {Chain, SrcPtr, |
25290 | DAG.getTargetConstant(ArgSize, dl, MVT::i32), |
25291 | DAG.getTargetConstant(ArgMode, dl, MVT::i8), |
25292 | DAG.getTargetConstant(Align, dl, MVT::i32)}; |
25293 | SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other); |
25294 | SDValue VAARG = DAG.getMemIntrinsicNode( |
25295 | Subtarget.isTarget64BitLP64() ? X86ISD::VAARG_64 : X86ISD::VAARG_X32, dl, |
25296 | VTs, InstOps, MVT::i64, MachinePointerInfo(SV), |
25297 | None, |
25298 | MachineMemOperand::MOLoad | MachineMemOperand::MOStore); |
25299 | Chain = VAARG.getValue(1); |
25300 | |
25301 | |
25302 | return DAG.getLoad(ArgVT, dl, Chain, VAARG, MachinePointerInfo()); |
25303 | } |
25304 | |
25305 | static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget, |
25306 | SelectionDAG &DAG) { |
25307 | |
25308 | |
25309 | assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!"); |
25310 | if (Subtarget.isCallingConvWin64( |
25311 | DAG.getMachineFunction().getFunction().getCallingConv())) |
25312 | |
25313 | return DAG.expandVACopy(Op.getNode()); |
25314 | |
25315 | SDValue Chain = Op.getOperand(0); |
25316 | SDValue DstPtr = Op.getOperand(1); |
25317 | SDValue SrcPtr = Op.getOperand(2); |
25318 | const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); |
25319 | const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); |
25320 | SDLoc DL(Op); |
25321 | |
25322 | return DAG.getMemcpy( |
25323 | Chain, DL, DstPtr, SrcPtr, |
25324 | DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 24 : 16, DL), |
25325 | Align(Subtarget.isTarget64BitLP64() ? 8 : 4), false, false, |
25326 | false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); |
25327 | } |
25328 | |
25329 | |
25330 | static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable) { |
25331 | switch (Opc) { |
25332 | case ISD::SHL: |
25333 | case X86ISD::VSHL: |
25334 | case X86ISD::VSHLI: |
25335 | return IsVariable ? X86ISD::VSHL : X86ISD::VSHLI; |
25336 | case ISD::SRL: |
25337 | case X86ISD::VSRL: |
25338 | case X86ISD::VSRLI: |
25339 | return IsVariable ? X86ISD::VSRL : X86ISD::VSRLI; |
25340 | case ISD::SRA: |
25341 | case X86ISD::VSRA: |
25342 | case X86ISD::VSRAI: |
25343 | return IsVariable ? X86ISD::VSRA : X86ISD::VSRAI; |
25344 | } |
25345 | llvm_unreachable("Unknown target vector shift node"); |
25346 | } |
25347 | |
25348 | |
25349 | |
25350 | static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, |
25351 | SDValue SrcOp, uint64_t ShiftAmt, |
25352 | SelectionDAG &DAG) { |
25353 | MVT ElementType = VT.getVectorElementType(); |
25354 | |
25355 | |
25356 | |
25357 | if (VT != SrcOp.getSimpleValueType()) |
25358 | SrcOp = DAG.getBitcast(VT, SrcOp); |
25359 | |
25360 | |
25361 | if (ShiftAmt == 0) |
25362 | return SrcOp; |
25363 | |
25364 | |
25365 | if (ShiftAmt >= ElementType.getSizeInBits()) { |
25366 | if (Opc == X86ISD::VSRAI) |
25367 | ShiftAmt = ElementType.getSizeInBits() - 1; |
25368 | else |
25369 | return DAG.getConstant(0, dl, VT); |
25370 | } |
25371 | |
25372 | assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) |
25373 | && "Unknown target vector shift-by-constant node"); |
25374 | |
25375 | |
25376 | |
25377 | if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) { |
25378 | SmallVector<SDValue, 8> Elts; |
25379 | unsigned NumElts = SrcOp->getNumOperands(); |
25380 | |
25381 | switch (Opc) { |
25382 | default: llvm_unreachable("Unknown opcode!"); |
25383 | case X86ISD::VSHLI: |
25384 | for (unsigned i = 0; i != NumElts; ++i) { |
25385 | SDValue CurrentOp = SrcOp->getOperand(i); |
25386 | if (CurrentOp->isUndef()) { |
25387 | |
25388 | Elts.push_back(DAG.getConstant(0, dl, ElementType)); |
25389 | continue; |
25390 | } |
25391 | auto *ND = cast<ConstantSDNode>(CurrentOp); |
25392 | const APInt &C = ND->getAPIntValue(); |
25393 | Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType)); |
25394 | } |
25395 | break; |
25396 | case X86ISD::VSRLI: |
25397 | for (unsigned i = 0; i != NumElts; ++i) { |
25398 | SDValue CurrentOp = SrcOp->getOperand(i); |
25399 | if (CurrentOp->isUndef()) { |
25400 | |
25401 | Elts.push_back(DAG.getConstant(0, dl, ElementType)); |
25402 | continue; |
25403 | } |
25404 | auto *ND = cast<ConstantSDNode>(CurrentOp); |
25405 | const APInt &C = ND->getAPIntValue(); |
25406 | Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType)); |
25407 | } |
25408 | break; |
25409 | case X86ISD::VSRAI: |
25410 | for (unsigned i = 0; i != NumElts; ++i) { |
25411 | SDValue CurrentOp = SrcOp->getOperand(i); |
25412 | if (CurrentOp->isUndef()) { |
25413 | |
25414 | Elts.push_back(DAG.getConstant(0, dl, ElementType)); |
25415 | continue; |
25416 | } |
25417 | auto *ND = cast<ConstantSDNode>(CurrentOp); |
25418 | const APInt &C = ND->getAPIntValue(); |
25419 | Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType)); |
25420 | } |
25421 | break; |
25422 | } |
25423 | |
25424 | return DAG.getBuildVector(VT, dl, Elts); |
25425 | } |
25426 | |
25427 | return DAG.getNode(Opc, dl, VT, SrcOp, |
25428 | DAG.getTargetConstant(ShiftAmt, dl, MVT::i8)); |
25429 | } |
25430 | |
25431 | |
25432 | |
25433 | static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, |
25434 | SDValue SrcOp, SDValue ShAmt, |
25435 | const X86Subtarget &Subtarget, |
25436 | SelectionDAG &DAG) { |
25437 | MVT SVT = ShAmt.getSimpleValueType(); |
25438 | assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); |
25439 | |
25440 | |
25441 | if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt)) |
25442 | return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp, |
25443 | CShAmt->getZExtValue(), DAG); |
25444 | |
25445 | |
25446 | Opc = getTargetVShiftUniformOpcode(Opc, true); |
25447 | |
25448 | |
25449 | |
25450 | |
25451 | |
25452 | |
25453 | |
25454 | |
25455 | |
25456 | |
25457 | |
25458 | |
25459 | |
25460 | if (SVT == MVT::i64) |
25461 | ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); |
25462 | else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND && |
25463 | ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
25464 | (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 || |
25465 | ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) { |
25466 | ShAmt = ShAmt.getOperand(0); |
25467 | MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16; |
25468 | ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt); |
25469 | if (Subtarget.hasSSE41()) |
25470 | ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), |
25471 | MVT::v2i64, ShAmt); |
25472 | else { |
25473 | SDValue ByteShift = DAG.getTargetConstant( |
25474 | (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); |
25475 | ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); |
25476 | ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, |
25477 | ByteShift); |
25478 | ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, |
25479 | ByteShift); |
25480 | } |
25481 | } else if (Subtarget.hasSSE41() && |
25482 | ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
25483 | ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); |
25484 | ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), |
25485 | MVT::v2i64, ShAmt); |
25486 | } else { |
25487 | SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT), |
25488 | DAG.getUNDEF(SVT)}; |
25489 | ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); |
25490 | } |
25491 | |
25492 | |
25493 | |
25494 | MVT EltVT = VT.getVectorElementType(); |
25495 | MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits()); |
25496 | |
25497 | ShAmt = DAG.getBitcast(ShVT, ShAmt); |
25498 | return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); |
25499 | } |
25500 | |
25501 | |
25502 | |
25503 | static SDValue getMaskNode(SDValue Mask, MVT MaskVT, |
25504 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
25505 | const SDLoc &dl) { |
25506 | |
25507 | if (isAllOnesConstant(Mask)) |
25508 | return DAG.getConstant(1, dl, MaskVT); |
25509 | if (X86::isZeroNode(Mask)) |
25510 | return DAG.getConstant(0, dl, MaskVT); |
25511 | |
25512 | assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!"); |
25513 | |
25514 | if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) { |
25515 | assert(MaskVT == MVT::v64i1 && "Expected v64i1 mask!"); |
25516 | assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); |
25517 | |
25518 | SDValue Lo, Hi; |
25519 | Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, |
25520 | DAG.getConstant(0, dl, MVT::i32)); |
25521 | Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, |
25522 | DAG.getConstant(1, dl, MVT::i32)); |
25523 | |
25524 | Lo = DAG.getBitcast(MVT::v32i1, Lo); |
25525 | Hi = DAG.getBitcast(MVT::v32i1, Hi); |
25526 | |
25527 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); |
25528 | } else { |
25529 | MVT BitcastVT = MVT::getVectorVT(MVT::i1, |
25530 | Mask.getSimpleValueType().getSizeInBits()); |
25531 | |
25532 | |
25533 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, |
25534 | DAG.getBitcast(BitcastVT, Mask), |
25535 | DAG.getIntPtrConstant(0, dl)); |
25536 | } |
25537 | } |
25538 | |
25539 | |
25540 | |
25541 | |
25542 | static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, |
25543 | SDValue PreservedSrc, |
25544 | const X86Subtarget &Subtarget, |
25545 | SelectionDAG &DAG) { |
25546 | MVT VT = Op.getSimpleValueType(); |
25547 | MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); |
25548 | unsigned OpcodeSelect = ISD::VSELECT; |
25549 | SDLoc dl(Op); |
25550 | |
25551 | if (isAllOnesConstant(Mask)) |
25552 | return Op; |
25553 | |
25554 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
25555 | |
25556 | if (PreservedSrc.isUndef()) |
25557 | PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); |
25558 | return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); |
25559 | } |
25560 | |
25561 | |
25562 | |
25563 | |
25564 | |
25565 | |
25566 | |
25567 | |
25568 | static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, |
25569 | SDValue PreservedSrc, |
25570 | const X86Subtarget &Subtarget, |
25571 | SelectionDAG &DAG) { |
25572 | |
25573 | if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask)) |
25574 | if (MaskConst->getZExtValue() & 0x1) |
25575 | return Op; |
25576 | |
25577 | MVT VT = Op.getSimpleValueType(); |
25578 | SDLoc dl(Op); |
25579 | |
25580 | assert(Mask.getValueType() == MVT::i8 && "Unexpect type"); |
25581 | SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1, |
25582 | DAG.getBitcast(MVT::v8i1, Mask), |
25583 | DAG.getIntPtrConstant(0, dl)); |
25584 | if (Op.getOpcode() == X86ISD::FSETCCM || |
25585 | Op.getOpcode() == X86ISD::FSETCCM_SAE || |
25586 | Op.getOpcode() == X86ISD::VFPCLASSS) |
25587 | return DAG.getNode(ISD::AND, dl, VT, Op, IMask); |
25588 | |
25589 | if (PreservedSrc.isUndef()) |
25590 | PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); |
25591 | return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc); |
25592 | } |
25593 | |
25594 | static int getSEHRegistrationNodeSize(const Function *Fn) { |
25595 | if (!Fn->hasPersonalityFn()) |
25596 | report_fatal_error( |
25597 | "querying registration node size for function without personality"); |
25598 | |
25599 | |
25600 | switch (classifyEHPersonality(Fn->getPersonalityFn())) { |
25601 | case EHPersonality::MSVC_X86SEH: return 24; |
25602 | case EHPersonality::MSVC_CXX: return 16; |
25603 | default: break; |
25604 | } |
25605 | report_fatal_error( |
25606 | "can only recover FP for 32-bit MSVC EH personality functions"); |
25607 | } |
25608 | |
25609 | |
25610 | |
25611 | |
25612 | |
25613 | |
25614 | |
25615 | |
25616 | |
25617 | static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, |
25618 | SDValue EntryEBP) { |
25619 | MachineFunction &MF = DAG.getMachineFunction(); |
25620 | SDLoc dl; |
25621 | |
25622 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
25623 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
25624 | |
25625 | |
25626 | |
25627 | |
25628 | if (!Fn->hasPersonalityFn()) |
25629 | return EntryEBP; |
25630 | |
25631 | |
25632 | |
25633 | MCSymbol *OffsetSym = |
25634 | MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( |
25635 | GlobalValue::dropLLVMManglingEscape(Fn->getName())); |
25636 | SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); |
25637 | SDValue ParentFrameOffset = |
25638 | DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); |
25639 | |
25640 | |
25641 | |
25642 | const X86Subtarget &Subtarget = |
25643 | static_cast<const X86Subtarget &>(DAG.getSubtarget()); |
25644 | if (Subtarget.is64Bit()) |
25645 | return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset); |
25646 | |
25647 | int RegNodeSize = getSEHRegistrationNodeSize(Fn); |
25648 | |
25649 | |
25650 | SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP, |
25651 | DAG.getConstant(RegNodeSize, dl, PtrVT)); |
25652 | return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset); |
25653 | } |
25654 | |
25655 | SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, |
25656 | SelectionDAG &DAG) const { |
25657 | |
25658 | auto isRoundModeCurDirection = [](SDValue Rnd) { |
25659 | if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) |
25660 | return C->getAPIntValue() == X86::STATIC_ROUNDING::CUR_DIRECTION; |
25661 | |
25662 | return false; |
25663 | }; |
25664 | auto isRoundModeSAE = [](SDValue Rnd) { |
25665 | if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) { |
25666 | unsigned RC = C->getZExtValue(); |
25667 | if (RC & X86::STATIC_ROUNDING::NO_EXC) { |
25668 | |
25669 | RC ^= X86::STATIC_ROUNDING::NO_EXC; |
25670 | |
25671 | |
25672 | return RC == 0 || RC == X86::STATIC_ROUNDING::CUR_DIRECTION; |
25673 | } |
25674 | } |
25675 | |
25676 | return false; |
25677 | }; |
25678 | auto isRoundModeSAEToX = [](SDValue Rnd, unsigned &RC) { |
25679 | if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) { |
25680 | RC = C->getZExtValue(); |
25681 | if (RC & X86::STATIC_ROUNDING::NO_EXC) { |
25682 | |
25683 | RC ^= X86::STATIC_ROUNDING::NO_EXC; |
25684 | return RC == X86::STATIC_ROUNDING::TO_NEAREST_INT || |
25685 | RC == X86::STATIC_ROUNDING::TO_NEG_INF || |
25686 | RC == X86::STATIC_ROUNDING::TO_POS_INF || |
25687 | RC == X86::STATIC_ROUNDING::TO_ZERO; |
25688 | } |
25689 | } |
25690 | |
25691 | return false; |
25692 | }; |
25693 | |
25694 | SDLoc dl(Op); |
25695 | unsigned IntNo = Op.getConstantOperandVal(0); |
25696 | MVT VT = Op.getSimpleValueType(); |
25697 | const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); |
25698 | |
25699 | |
25700 | SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags()); |
25701 | |
25702 | if (IntrData) { |
25703 | switch(IntrData->Type) { |
25704 | case INTR_TYPE_1OP: { |
25705 | |
25706 | |
25707 | |
25708 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25709 | if (IntrWithRoundingModeOpcode != 0) { |
25710 | SDValue Rnd = Op.getOperand(2); |
25711 | unsigned RC = 0; |
25712 | if (isRoundModeSAEToX(Rnd, RC)) |
25713 | return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
25714 | Op.getOperand(1), |
25715 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25716 | if (!isRoundModeCurDirection(Rnd)) |
25717 | return SDValue(); |
25718 | } |
25719 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25720 | Op.getOperand(1)); |
25721 | } |
25722 | case INTR_TYPE_1OP_SAE: { |
25723 | SDValue Sae = Op.getOperand(2); |
25724 | |
25725 | unsigned Opc; |
25726 | if (isRoundModeCurDirection(Sae)) |
25727 | Opc = IntrData->Opc0; |
25728 | else if (isRoundModeSAE(Sae)) |
25729 | Opc = IntrData->Opc1; |
25730 | else |
25731 | return SDValue(); |
25732 | |
25733 | return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1)); |
25734 | } |
25735 | case INTR_TYPE_2OP: { |
25736 | SDValue Src2 = Op.getOperand(2); |
25737 | |
25738 | |
25739 | |
25740 | |
25741 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25742 | if (IntrWithRoundingModeOpcode != 0) { |
25743 | SDValue Rnd = Op.getOperand(3); |
25744 | unsigned RC = 0; |
25745 | if (isRoundModeSAEToX(Rnd, RC)) |
25746 | return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
25747 | Op.getOperand(1), Src2, |
25748 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25749 | if (!isRoundModeCurDirection(Rnd)) |
25750 | return SDValue(); |
25751 | } |
25752 | |
25753 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25754 | Op.getOperand(1), Src2); |
25755 | } |
25756 | case INTR_TYPE_2OP_SAE: { |
25757 | SDValue Sae = Op.getOperand(3); |
25758 | |
25759 | unsigned Opc; |
25760 | if (isRoundModeCurDirection(Sae)) |
25761 | Opc = IntrData->Opc0; |
25762 | else if (isRoundModeSAE(Sae)) |
25763 | Opc = IntrData->Opc1; |
25764 | else |
25765 | return SDValue(); |
25766 | |
25767 | return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1), |
25768 | Op.getOperand(2)); |
25769 | } |
25770 | case INTR_TYPE_3OP: |
25771 | case INTR_TYPE_3OP_IMM8: { |
25772 | SDValue Src1 = Op.getOperand(1); |
25773 | SDValue Src2 = Op.getOperand(2); |
25774 | SDValue Src3 = Op.getOperand(3); |
25775 | |
25776 | if (IntrData->Type == INTR_TYPE_3OP_IMM8 && |
25777 | Src3.getValueType() != MVT::i8) { |
25778 | Src3 = DAG.getTargetConstant( |
25779 | cast<ConstantSDNode>(Src3)->getZExtValue() & 0xff, dl, MVT::i8); |
25780 | } |
25781 | |
25782 | |
25783 | |
25784 | |
25785 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25786 | if (IntrWithRoundingModeOpcode != 0) { |
25787 | SDValue Rnd = Op.getOperand(4); |
25788 | unsigned RC = 0; |
25789 | if (isRoundModeSAEToX(Rnd, RC)) |
25790 | return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
25791 | Src1, Src2, Src3, |
25792 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25793 | if (!isRoundModeCurDirection(Rnd)) |
25794 | return SDValue(); |
25795 | } |
25796 | |
25797 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25798 | {Src1, Src2, Src3}); |
25799 | } |
25800 | case INTR_TYPE_4OP_IMM8: { |
25801 | assert(Op.getOperand(4)->getOpcode() == ISD::TargetConstant); |
25802 | SDValue Src4 = Op.getOperand(4); |
25803 | if (Src4.getValueType() != MVT::i8) { |
25804 | Src4 = DAG.getTargetConstant( |
25805 | cast<ConstantSDNode>(Src4)->getZExtValue() & 0xff, dl, MVT::i8); |
25806 | } |
25807 | |
25808 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25809 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), |
25810 | Src4); |
25811 | } |
25812 | case INTR_TYPE_1OP_MASK: { |
25813 | SDValue Src = Op.getOperand(1); |
25814 | SDValue PassThru = Op.getOperand(2); |
25815 | SDValue Mask = Op.getOperand(3); |
25816 | |
25817 | |
25818 | |
25819 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25820 | if (IntrWithRoundingModeOpcode != 0) { |
25821 | SDValue Rnd = Op.getOperand(4); |
25822 | unsigned RC = 0; |
25823 | if (isRoundModeSAEToX(Rnd, RC)) |
25824 | return getVectorMaskingNode( |
25825 | DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
25826 | Src, DAG.getTargetConstant(RC, dl, MVT::i32)), |
25827 | Mask, PassThru, Subtarget, DAG); |
25828 | if (!isRoundModeCurDirection(Rnd)) |
25829 | return SDValue(); |
25830 | } |
25831 | return getVectorMaskingNode( |
25832 | DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru, |
25833 | Subtarget, DAG); |
25834 | } |
25835 | case INTR_TYPE_1OP_MASK_SAE: { |
25836 | SDValue Src = Op.getOperand(1); |
25837 | SDValue PassThru = Op.getOperand(2); |
25838 | SDValue Mask = Op.getOperand(3); |
25839 | SDValue Rnd = Op.getOperand(4); |
25840 | |
25841 | unsigned Opc; |
25842 | if (isRoundModeCurDirection(Rnd)) |
25843 | Opc = IntrData->Opc0; |
25844 | else if (isRoundModeSAE(Rnd)) |
25845 | Opc = IntrData->Opc1; |
25846 | else |
25847 | return SDValue(); |
25848 | |
25849 | return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru, |
25850 | Subtarget, DAG); |
25851 | } |
25852 | case INTR_TYPE_SCALAR_MASK: { |
25853 | SDValue Src1 = Op.getOperand(1); |
25854 | SDValue Src2 = Op.getOperand(2); |
25855 | SDValue passThru = Op.getOperand(3); |
25856 | SDValue Mask = Op.getOperand(4); |
25857 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25858 | |
25859 | |
25860 | |
25861 | bool HasRounding = IntrWithRoundingModeOpcode != 0; |
25862 | if (Op.getNumOperands() == (5U + HasRounding)) { |
25863 | if (HasRounding) { |
25864 | SDValue Rnd = Op.getOperand(5); |
25865 | unsigned RC = 0; |
25866 | if (isRoundModeSAEToX(Rnd, RC)) |
25867 | return getScalarMaskingNode( |
25868 | DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2, |
25869 | DAG.getTargetConstant(RC, dl, MVT::i32)), |
25870 | Mask, passThru, Subtarget, DAG); |
25871 | if (!isRoundModeCurDirection(Rnd)) |
25872 | return SDValue(); |
25873 | } |
25874 | return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, |
25875 | Src2), |
25876 | Mask, passThru, Subtarget, DAG); |
25877 | } |
25878 | |
25879 | assert(Op.getNumOperands() == (6U + HasRounding) && |
25880 | "Unexpected intrinsic form"); |
25881 | SDValue RoundingMode = Op.getOperand(5); |
25882 | unsigned Opc = IntrData->Opc0; |
25883 | if (HasRounding) { |
25884 | SDValue Sae = Op.getOperand(6); |
25885 | if (isRoundModeSAE(Sae)) |
25886 | Opc = IntrWithRoundingModeOpcode; |
25887 | else if (!isRoundModeCurDirection(Sae)) |
25888 | return SDValue(); |
25889 | } |
25890 | return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, |
25891 | Src2, RoundingMode), |
25892 | Mask, passThru, Subtarget, DAG); |
25893 | } |
25894 | case INTR_TYPE_SCALAR_MASK_RND: { |
25895 | SDValue Src1 = Op.getOperand(1); |
25896 | SDValue Src2 = Op.getOperand(2); |
25897 | SDValue passThru = Op.getOperand(3); |
25898 | SDValue Mask = Op.getOperand(4); |
25899 | SDValue Rnd = Op.getOperand(5); |
25900 | |
25901 | SDValue NewOp; |
25902 | unsigned RC = 0; |
25903 | if (isRoundModeCurDirection(Rnd)) |
25904 | NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); |
25905 | else if (isRoundModeSAEToX(Rnd, RC)) |
25906 | NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, |
25907 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25908 | else |
25909 | return SDValue(); |
25910 | |
25911 | return getScalarMaskingNode(NewOp, Mask, passThru, Subtarget, DAG); |
25912 | } |
25913 | case INTR_TYPE_SCALAR_MASK_SAE: { |
25914 | SDValue Src1 = Op.getOperand(1); |
25915 | SDValue Src2 = Op.getOperand(2); |
25916 | SDValue passThru = Op.getOperand(3); |
25917 | SDValue Mask = Op.getOperand(4); |
25918 | SDValue Sae = Op.getOperand(5); |
25919 | unsigned Opc; |
25920 | if (isRoundModeCurDirection(Sae)) |
25921 | Opc = IntrData->Opc0; |
25922 | else if (isRoundModeSAE(Sae)) |
25923 | Opc = IntrData->Opc1; |
25924 | else |
25925 | return SDValue(); |
25926 | |
25927 | return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), |
25928 | Mask, passThru, Subtarget, DAG); |
25929 | } |
25930 | case INTR_TYPE_2OP_MASK: { |
25931 | SDValue Src1 = Op.getOperand(1); |
25932 | SDValue Src2 = Op.getOperand(2); |
25933 | SDValue PassThru = Op.getOperand(3); |
25934 | SDValue Mask = Op.getOperand(4); |
25935 | SDValue NewOp; |
25936 | if (IntrData->Opc1 != 0) { |
25937 | SDValue Rnd = Op.getOperand(5); |
25938 | unsigned RC = 0; |
25939 | if (isRoundModeSAEToX(Rnd, RC)) |
25940 | NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, |
25941 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25942 | else if (!isRoundModeCurDirection(Rnd)) |
25943 | return SDValue(); |
25944 | } |
25945 | if (!NewOp) |
25946 | NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); |
25947 | return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG); |
25948 | } |
25949 | case INTR_TYPE_2OP_MASK_SAE: { |
25950 | SDValue Src1 = Op.getOperand(1); |
25951 | SDValue Src2 = Op.getOperand(2); |
25952 | SDValue PassThru = Op.getOperand(3); |
25953 | SDValue Mask = Op.getOperand(4); |
25954 | |
25955 | unsigned Opc = IntrData->Opc0; |
25956 | if (IntrData->Opc1 != 0) { |
25957 | SDValue Sae = Op.getOperand(5); |
25958 | if (isRoundModeSAE(Sae)) |
25959 | Opc = IntrData->Opc1; |
25960 | else if (!isRoundModeCurDirection(Sae)) |
25961 | return SDValue(); |
25962 | } |
25963 | |
25964 | return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), |
25965 | Mask, PassThru, Subtarget, DAG); |
25966 | } |
25967 | case INTR_TYPE_3OP_SCALAR_MASK_SAE: { |
25968 | SDValue Src1 = Op.getOperand(1); |
25969 | SDValue Src2 = Op.getOperand(2); |
25970 | SDValue Src3 = Op.getOperand(3); |
25971 | SDValue PassThru = Op.getOperand(4); |
25972 | SDValue Mask = Op.getOperand(5); |
25973 | SDValue Sae = Op.getOperand(6); |
25974 | unsigned Opc; |
25975 | if (isRoundModeCurDirection(Sae)) |
25976 | Opc = IntrData->Opc0; |
25977 | else if (isRoundModeSAE(Sae)) |
25978 | Opc = IntrData->Opc1; |
25979 | else |
25980 | return SDValue(); |
25981 | |
25982 | return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3), |
25983 | Mask, PassThru, Subtarget, DAG); |
25984 | } |
25985 | case INTR_TYPE_3OP_MASK_SAE: { |
25986 | SDValue Src1 = Op.getOperand(1); |
25987 | SDValue Src2 = Op.getOperand(2); |
25988 | SDValue Src3 = Op.getOperand(3); |
25989 | SDValue PassThru = Op.getOperand(4); |
25990 | SDValue Mask = Op.getOperand(5); |
25991 | |
25992 | unsigned Opc = IntrData->Opc0; |
25993 | if (IntrData->Opc1 != 0) { |
25994 | SDValue Sae = Op.getOperand(6); |
25995 | if (isRoundModeSAE(Sae)) |
25996 | Opc = IntrData->Opc1; |
25997 | else if (!isRoundModeCurDirection(Sae)) |
25998 | return SDValue(); |
25999 | } |
26000 | return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3), |
26001 | Mask, PassThru, Subtarget, DAG); |
26002 | } |
26003 | case BLENDV: { |
26004 | SDValue Src1 = Op.getOperand(1); |
26005 | SDValue Src2 = Op.getOperand(2); |
26006 | SDValue Src3 = Op.getOperand(3); |
26007 | |
26008 | EVT MaskVT = Src3.getValueType().changeVectorElementTypeToInteger(); |
26009 | Src3 = DAG.getBitcast(MaskVT, Src3); |
26010 | |
26011 | |
26012 | return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1); |
26013 | } |
26014 | case VPERM_2OP : { |
26015 | SDValue Src1 = Op.getOperand(1); |
26016 | SDValue Src2 = Op.getOperand(2); |
26017 | |
26018 | |
26019 | return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1); |
26020 | } |
26021 | case FMA_OP_MASKZ: |
26022 | case FMA_OP_MASK: { |
26023 | SDValue Src1 = Op.getOperand(1); |
26024 | SDValue Src2 = Op.getOperand(2); |
26025 | SDValue Src3 = Op.getOperand(3); |
26026 | SDValue Mask = Op.getOperand(4); |
26027 | MVT VT = Op.getSimpleValueType(); |
26028 | |
26029 | SDValue PassThru = Src1; |
26030 | if (IntrData->Type == FMA_OP_MASKZ) |
26031 | PassThru = getZeroVector(VT, Subtarget, DAG, dl); |
26032 | |
26033 | |
26034 | |
26035 | |
26036 | SDValue NewOp; |
26037 | if (IntrData->Opc1 != 0) { |
26038 | SDValue Rnd = Op.getOperand(5); |
26039 | unsigned RC = 0; |
26040 | if (isRoundModeSAEToX(Rnd, RC)) |
26041 | NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Src3, |
26042 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
26043 | else if (!isRoundModeCurDirection(Rnd)) |
26044 | return SDValue(); |
26045 | } |
26046 | if (!NewOp) |
26047 | NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3); |
26048 | return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG); |
26049 | } |
26050 | case IFMA_OP: |
26051 | |
26052 | |
26053 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
26054 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
26055 | case FPCLASSS: { |
26056 | SDValue Src1 = Op.getOperand(1); |
26057 | SDValue Imm = Op.getOperand(2); |
26058 | SDValue Mask = Op.getOperand(3); |
26059 | SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm); |
26060 | SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(), |
26061 | Subtarget, DAG); |
26062 | |
26063 | |
26064 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, |
26065 | DAG.getConstant(0, dl, MVT::v8i1), |
26066 | FPclassMask, DAG.getIntPtrConstant(0, dl)); |
26067 | return DAG.getBitcast(MVT::i8, Ins); |
26068 | } |
26069 | |
26070 | case CMP_MASK_CC: { |
26071 | MVT MaskVT = Op.getSimpleValueType(); |
26072 | SDValue CC = Op.getOperand(3); |
26073 | SDValue Mask = Op.getOperand(4); |
26074 | |
26075 | |
26076 | |
26077 | if (IntrData->Opc1 != 0) { |
26078 | SDValue Sae = Op.getOperand(5); |
26079 | if (isRoundModeSAE(Sae)) |
26080 | return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), |
26081 | Op.getOperand(2), CC, Mask, Sae); |
26082 | if (!isRoundModeCurDirection(Sae)) |
26083 | return SDValue(); |
26084 | } |
26085 | |
26086 | return DAG.getNode(IntrData->Opc0, dl, MaskVT, |
26087 | {Op.getOperand(1), Op.getOperand(2), CC, Mask}); |
26088 | } |
26089 | case CMP_MASK_SCALAR_CC: { |
26090 | SDValue Src1 = Op.getOperand(1); |
26091 | SDValue Src2 = Op.getOperand(2); |
26092 | SDValue CC = Op.getOperand(3); |
26093 | SDValue Mask = Op.getOperand(4); |
26094 | |
26095 | SDValue Cmp; |
26096 | if (IntrData->Opc1 != 0) { |
26097 | SDValue Sae = Op.getOperand(5); |
26098 | if (isRoundModeSAE(Sae)) |
26099 | Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae); |
26100 | else if (!isRoundModeCurDirection(Sae)) |
26101 | return SDValue(); |
26102 | } |
26103 | |
26104 | if (!Cmp.getNode()) |
26105 | Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC); |
26106 | |
26107 | SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(), |
26108 | Subtarget, DAG); |
26109 | |
26110 | |
26111 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, |
26112 | DAG.getConstant(0, dl, MVT::v8i1), |
26113 | CmpMask, DAG.getIntPtrConstant(0, dl)); |
26114 | return DAG.getBitcast(MVT::i8, Ins); |
26115 | } |
26116 | case COMI: { |
26117 | ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; |
26118 | SDValue LHS = Op.getOperand(1); |
26119 | SDValue RHS = Op.getOperand(2); |
26120 | |
26121 | if (CC == ISD::SETLT || CC == ISD::SETLE) |
26122 | std::swap(LHS, RHS); |
26123 | |
26124 | SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); |
26125 | SDValue SetCC; |
26126 | switch (CC) { |
26127 | case ISD::SETEQ: { |
26128 | SetCC = getSETCC(X86::COND_E, Comi, dl, DAG); |
26129 | SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG); |
26130 | SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP); |
26131 | break; |
26132 | } |
26133 | case ISD::SETNE: { |
26134 | SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG); |
26135 | SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG); |
26136 | SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP); |
26137 | break; |
26138 | } |
26139 | case ISD::SETGT: |
26140 | case ISD::SETLT: { |
26141 | SetCC = getSETCC(X86::COND_A, Comi, dl, DAG); |
26142 | break; |
26143 | } |
26144 | case ISD::SETGE: |
26145 | case ISD::SETLE: |
26146 | SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG); |
26147 | break; |
26148 | default: |
26149 | llvm_unreachable("Unexpected illegal condition!"); |
26150 | } |
26151 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); |
26152 | } |
26153 | case COMI_RM: { |
26154 | SDValue LHS = Op.getOperand(1); |
26155 | SDValue RHS = Op.getOperand(2); |
26156 | unsigned CondVal = Op.getConstantOperandVal(3); |
26157 | SDValue Sae = Op.getOperand(4); |
26158 | |
26159 | SDValue FCmp; |
26160 | if (isRoundModeCurDirection(Sae)) |
26161 | FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, |
26162 | DAG.getTargetConstant(CondVal, dl, MVT::i8)); |
26163 | else if (isRoundModeSAE(Sae)) |
26164 | FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS, |
26165 | DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae); |
26166 | else |
26167 | return SDValue(); |
26168 | |
26169 | |
26170 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, |
26171 | DAG.getConstant(0, dl, MVT::v16i1), |
26172 | FCmp, DAG.getIntPtrConstant(0, dl)); |
26173 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, |
26174 | DAG.getBitcast(MVT::i16, Ins)); |
26175 | } |
26176 | case VSHIFT: |
26177 | return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), |
26178 | Op.getOperand(1), Op.getOperand(2), Subtarget, |
26179 | DAG); |
26180 | case COMPRESS_EXPAND_IN_REG: { |
26181 | SDValue Mask = Op.getOperand(3); |
26182 | SDValue DataToCompress = Op.getOperand(1); |
26183 | SDValue PassThru = Op.getOperand(2); |
26184 | if (ISD::isBuildVectorAllOnes(Mask.getNode())) |
26185 | return Op.getOperand(1); |
26186 | |
26187 | |
26188 | if (PassThru.isUndef()) |
26189 | PassThru = DAG.getConstant(0, dl, VT); |
26190 | |
26191 | return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru, |
26192 | Mask); |
26193 | } |
26194 | case FIXUPIMM: |
26195 | case FIXUPIMM_MASKZ: { |
26196 | SDValue Src1 = Op.getOperand(1); |
26197 | SDValue Src2 = Op.getOperand(2); |
26198 | SDValue Src3 = Op.getOperand(3); |
26199 | SDValue Imm = Op.getOperand(4); |
26200 | SDValue Mask = Op.getOperand(5); |
26201 | SDValue Passthru = (IntrData->Type == FIXUPIMM) |
26202 | ? Src1 |
26203 | : getZeroVector(VT, Subtarget, DAG, dl); |
26204 | |
26205 | unsigned Opc = IntrData->Opc0; |
26206 | if (IntrData->Opc1 != 0) { |
26207 | SDValue Sae = Op.getOperand(6); |
26208 | if (isRoundModeSAE(Sae)) |
26209 | Opc = IntrData->Opc1; |
26210 | else if (!isRoundModeCurDirection(Sae)) |
26211 | return SDValue(); |
26212 | } |
26213 | |
26214 | SDValue FixupImm = DAG.getNode(Opc, dl, VT, Src1, Src2, Src3, Imm); |
26215 | |
26216 | if (Opc == X86ISD::VFIXUPIMM || Opc == X86ISD::VFIXUPIMM_SAE) |
26217 | return getVectorMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG); |
26218 | |
26219 | return getScalarMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG); |
26220 | } |
26221 | case ROUNDP: { |
26222 | assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); |
26223 | |
26224 | |
26225 | auto Round = cast<ConstantSDNode>(Op.getOperand(2)); |
26226 | SDValue RoundingMode = |
26227 | DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); |
26228 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
26229 | Op.getOperand(1), RoundingMode); |
26230 | } |
26231 | case ROUNDS: { |
26232 | assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); |
26233 | |
26234 | |
26235 | auto Round = cast<ConstantSDNode>(Op.getOperand(3)); |
26236 | SDValue RoundingMode = |
26237 | DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); |
26238 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
26239 | Op.getOperand(1), Op.getOperand(2), RoundingMode); |
26240 | } |
26241 | case BEXTRI: { |
26242 | assert(IntrData->Opc0 == X86ISD::BEXTRI && "Unexpected opcode"); |
26243 | |
26244 | uint64_t Imm = Op.getConstantOperandVal(2); |
26245 | SDValue Control = DAG.getTargetConstant(Imm & 0xffff, dl, |
26246 | Op.getValueType()); |
26247 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
26248 | Op.getOperand(1), Control); |
26249 | } |
26250 | |
26251 | case ADX: { |
26252 | SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); |
26253 | SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32); |
26254 | |
26255 | SDValue Res; |
26256 | |
26257 | |
26258 | if (isNullConstant(Op.getOperand(1))) { |
26259 | Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2), |
26260 | Op.getOperand(3)); |
26261 | } else { |
26262 | SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1), |
26263 | DAG.getConstant(-1, dl, MVT::i8)); |
26264 | Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2), |
26265 | Op.getOperand(3), GenCF.getValue(1)); |
26266 | } |
26267 | SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG); |
26268 | SDValue Results[] = { SetCC, Res }; |
26269 | return DAG.getMergeValues(Results, dl); |
26270 | } |
26271 | case CVTPD2PS_MASK: |
26272 | case CVTPD2DQ_MASK: |
26273 | case CVTQQ2PS_MASK: |
26274 | case TRUNCATE_TO_REG: { |
26275 | SDValue Src = Op.getOperand(1); |
26276 | SDValue PassThru = Op.getOperand(2); |
26277 | SDValue Mask = Op.getOperand(3); |
26278 | |
26279 | if (isAllOnesConstant(Mask)) |
26280 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); |
26281 | |
26282 | MVT SrcVT = Src.getSimpleValueType(); |
26283 | MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); |
26284 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26285 | return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), |
26286 | {Src, PassThru, Mask}); |
26287 | } |
26288 | case CVTPS2PH_MASK: { |
26289 | SDValue Src = Op.getOperand(1); |
26290 | SDValue Rnd = Op.getOperand(2); |
26291 | SDValue PassThru = Op.getOperand(3); |
26292 | SDValue Mask = Op.getOperand(4); |
26293 | |
26294 | if (isAllOnesConstant(Mask)) |
26295 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src, Rnd); |
26296 | |
26297 | MVT SrcVT = Src.getSimpleValueType(); |
26298 | MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); |
26299 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26300 | return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd, |
26301 | PassThru, Mask); |
26302 | |
26303 | } |
26304 | case CVTNEPS2BF16_MASK: { |
26305 | SDValue Src = Op.getOperand(1); |
26306 | SDValue PassThru = Op.getOperand(2); |
26307 | SDValue Mask = Op.getOperand(3); |
26308 | |
26309 | if (ISD::isBuildVectorAllOnes(Mask.getNode())) |
26310 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); |
26311 | |
26312 | |
26313 | if (PassThru.isUndef()) |
26314 | PassThru = DAG.getConstant(0, dl, PassThru.getValueType()); |
26315 | |
26316 | return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru, |
26317 | Mask); |
26318 | } |
26319 | default: |
26320 | break; |
26321 | } |
26322 | } |
26323 | |
26324 | switch (IntNo) { |
26325 | default: return SDValue(); |
26326 | |
26327 | |
26328 | |
26329 | |
26330 | case Intrinsic::x86_avx512_ktestc_b: |
26331 | case Intrinsic::x86_avx512_ktestc_w: |
26332 | case Intrinsic::x86_avx512_ktestc_d: |
26333 | case Intrinsic::x86_avx512_ktestc_q: |
26334 | case Intrinsic::x86_avx512_ktestz_b: |
26335 | case Intrinsic::x86_avx512_ktestz_w: |
26336 | case Intrinsic::x86_avx512_ktestz_d: |
26337 | case Intrinsic::x86_avx512_ktestz_q: |
26338 | case Intrinsic::x86_sse41_ptestz: |
26339 | case Intrinsic::x86_sse41_ptestc: |
26340 | case Intrinsic::x86_sse41_ptestnzc: |
26341 | case Intrinsic::x86_avx_ptestz_256: |
26342 | case Intrinsic::x86_avx_ptestc_256: |
26343 | case Intrinsic::x86_avx_ptestnzc_256: |
26344 | case Intrinsic::x86_avx_vtestz_ps: |
26345 | case Intrinsic::x86_avx_vtestc_ps: |
26346 | case Intrinsic::x86_avx_vtestnzc_ps: |
26347 | case Intrinsic::x86_avx_vtestz_pd: |
26348 | case Intrinsic::x86_avx_vtestc_pd: |
26349 | case Intrinsic::x86_avx_vtestnzc_pd: |
26350 | case Intrinsic::x86_avx_vtestz_ps_256: |
26351 | case Intrinsic::x86_avx_vtestc_ps_256: |
26352 | case Intrinsic::x86_avx_vtestnzc_ps_256: |
26353 | case Intrinsic::x86_avx_vtestz_pd_256: |
26354 | case Intrinsic::x86_avx_vtestc_pd_256: |
26355 | case Intrinsic::x86_avx_vtestnzc_pd_256: { |
26356 | unsigned TestOpc = X86ISD::PTEST; |
26357 | X86::CondCode X86CC; |
26358 | switch (IntNo) { |
26359 | default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); |
26360 | case Intrinsic::x86_avx512_ktestc_b: |
26361 | case Intrinsic::x86_avx512_ktestc_w: |
26362 | case Intrinsic::x86_avx512_ktestc_d: |
26363 | case Intrinsic::x86_avx512_ktestc_q: |
26364 | |
26365 | TestOpc = X86ISD::KTEST; |
26366 | X86CC = X86::COND_B; |
26367 | break; |
26368 | case Intrinsic::x86_avx512_ktestz_b: |
26369 | case Intrinsic::x86_avx512_ktestz_w: |
26370 | case Intrinsic::x86_avx512_ktestz_d: |
26371 | case Intrinsic::x86_avx512_ktestz_q: |
26372 | TestOpc = X86ISD::KTEST; |
26373 | X86CC = X86::COND_E; |
26374 | break; |
26375 | case Intrinsic::x86_avx_vtestz_ps: |
26376 | case Intrinsic::x86_avx_vtestz_pd: |
26377 | case Intrinsic::x86_avx_vtestz_ps_256: |
26378 | case Intrinsic::x86_avx_vtestz_pd_256: |
26379 | TestOpc = X86ISD::TESTP; |
26380 | LLVM_FALLTHROUGH; |
26381 | case Intrinsic::x86_sse41_ptestz: |
26382 | case Intrinsic::x86_avx_ptestz_256: |
26383 | |
26384 | X86CC = X86::COND_E; |
26385 | break; |
26386 | case Intrinsic::x86_avx_vtestc_ps: |
26387 | case Intrinsic::x86_avx_vtestc_pd: |
26388 | case Intrinsic::x86_avx_vtestc_ps_256: |
26389 | case Intrinsic::x86_avx_vtestc_pd_256: |
26390 | TestOpc = X86ISD::TESTP; |
26391 | LLVM_FALLTHROUGH; |
26392 | case Intrinsic::x86_sse41_ptestc: |
26393 | case Intrinsic::x86_avx_ptestc_256: |
26394 | |
26395 | X86CC = X86::COND_B; |
26396 | break; |
26397 | case Intrinsic::x86_avx_vtestnzc_ps: |
26398 | case Intrinsic::x86_avx_vtestnzc_pd: |
26399 | case Intrinsic::x86_avx_vtestnzc_ps_256: |
26400 | case Intrinsic::x86_avx_vtestnzc_pd_256: |
26401 | TestOpc = X86ISD::TESTP; |
26402 | LLVM_FALLTHROUGH; |
26403 | case Intrinsic::x86_sse41_ptestnzc: |
26404 | case Intrinsic::x86_avx_ptestnzc_256: |
26405 | |
26406 | X86CC = X86::COND_A; |
26407 | break; |
26408 | } |
26409 | |
26410 | SDValue LHS = Op.getOperand(1); |
26411 | SDValue RHS = Op.getOperand(2); |
26412 | SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); |
26413 | SDValue SetCC = getSETCC(X86CC, Test, dl, DAG); |
26414 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); |
26415 | } |
26416 | |
26417 | case Intrinsic::x86_sse42_pcmpistria128: |
26418 | case Intrinsic::x86_sse42_pcmpestria128: |
26419 | case Intrinsic::x86_sse42_pcmpistric128: |
26420 | case Intrinsic::x86_sse42_pcmpestric128: |
26421 | case Intrinsic::x86_sse42_pcmpistrio128: |
26422 | case Intrinsic::x86_sse42_pcmpestrio128: |
26423 | case Intrinsic::x86_sse42_pcmpistris128: |
26424 | case Intrinsic::x86_sse42_pcmpestris128: |
26425 | case Intrinsic::x86_sse42_pcmpistriz128: |
26426 | case Intrinsic::x86_sse42_pcmpestriz128: { |
26427 | unsigned Opcode; |
26428 | X86::CondCode X86CC; |
26429 | switch (IntNo) { |
26430 | default: llvm_unreachable("Impossible intrinsic"); |
26431 | case Intrinsic::x86_sse42_pcmpistria128: |
26432 | Opcode = X86ISD::PCMPISTR; |
26433 | X86CC = X86::COND_A; |
26434 | break; |
26435 | case Intrinsic::x86_sse42_pcmpestria128: |
26436 | Opcode = X86ISD::PCMPESTR; |
26437 | X86CC = X86::COND_A; |
26438 | break; |
26439 | case Intrinsic::x86_sse42_pcmpistric128: |
26440 | Opcode = X86ISD::PCMPISTR; |
26441 | X86CC = X86::COND_B; |
26442 | break; |
26443 | case Intrinsic::x86_sse42_pcmpestric128: |
26444 | Opcode = X86ISD::PCMPESTR; |
26445 | X86CC = X86::COND_B; |
26446 | break; |
26447 | case Intrinsic::x86_sse42_pcmpistrio128: |
26448 | Opcode = X86ISD::PCMPISTR; |
26449 | X86CC = X86::COND_O; |
26450 | break; |
26451 | case Intrinsic::x86_sse42_pcmpestrio128: |
26452 | Opcode = X86ISD::PCMPESTR; |
26453 | X86CC = X86::COND_O; |
26454 | break; |
26455 | case Intrinsic::x86_sse42_pcmpistris128: |
26456 | Opcode = X86ISD::PCMPISTR; |
26457 | X86CC = X86::COND_S; |
26458 | break; |
26459 | case Intrinsic::x86_sse42_pcmpestris128: |
26460 | Opcode = X86ISD::PCMPESTR; |
26461 | X86CC = X86::COND_S; |
26462 | break; |
26463 | case Intrinsic::x86_sse42_pcmpistriz128: |
26464 | Opcode = X86ISD::PCMPISTR; |
26465 | X86CC = X86::COND_E; |
26466 | break; |
26467 | case Intrinsic::x86_sse42_pcmpestriz128: |
26468 | Opcode = X86ISD::PCMPESTR; |
26469 | X86CC = X86::COND_E; |
26470 | break; |
26471 | } |
26472 | SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end()); |
26473 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); |
26474 | SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps).getValue(2); |
26475 | SDValue SetCC = getSETCC(X86CC, PCMP, dl, DAG); |
26476 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); |
26477 | } |
26478 | |
26479 | case Intrinsic::x86_sse42_pcmpistri128: |
26480 | case Intrinsic::x86_sse42_pcmpestri128: { |
26481 | unsigned Opcode; |
26482 | if (IntNo == Intrinsic::x86_sse42_pcmpistri128) |
26483 | Opcode = X86ISD::PCMPISTR; |
26484 | else |
26485 | Opcode = X86ISD::PCMPESTR; |
26486 | |
26487 | SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end()); |
26488 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); |
26489 | return DAG.getNode(Opcode, dl, VTs, NewOps); |
26490 | } |
26491 | |
26492 | case Intrinsic::x86_sse42_pcmpistrm128: |
26493 | case Intrinsic::x86_sse42_pcmpestrm128: { |
26494 | unsigned Opcode; |
26495 | if (IntNo == Intrinsic::x86_sse42_pcmpistrm128) |
26496 | Opcode = X86ISD::PCMPISTR; |
26497 | else |
26498 | Opcode = X86ISD::PCMPESTR; |
26499 | |
26500 | SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end()); |
26501 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); |
26502 | return DAG.getNode(Opcode, dl, VTs, NewOps).getValue(1); |
26503 | } |
26504 | |
26505 | case Intrinsic::eh_sjlj_lsda: { |
26506 | MachineFunction &MF = DAG.getMachineFunction(); |
26507 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26508 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
26509 | auto &Context = MF.getMMI().getContext(); |
26510 | MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + |
26511 | Twine(MF.getFunctionNumber())); |
26512 | return DAG.getNode(getGlobalWrapperKind(), dl, VT, |
26513 | DAG.getMCSymbol(S, PtrVT)); |
26514 | } |
26515 | |
26516 | case Intrinsic::x86_seh_lsda: { |
26517 | |
26518 | MachineFunction &MF = DAG.getMachineFunction(); |
26519 | SDValue Op1 = Op.getOperand(1); |
26520 | auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal()); |
26521 | MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( |
26522 | GlobalValue::dropLLVMManglingEscape(Fn->getName())); |
26523 | |
26524 | |
26525 | |
26526 | SDValue Result = DAG.getMCSymbol(LSDASym, VT); |
26527 | return DAG.getNode(X86ISD::Wrapper, dl, VT, Result); |
26528 | } |
26529 | |
26530 | case Intrinsic::eh_recoverfp: { |
26531 | SDValue FnOp = Op.getOperand(1); |
26532 | SDValue IncomingFPOp = Op.getOperand(2); |
26533 | GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp); |
26534 | auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr); |
26535 | if (!Fn) |
26536 | report_fatal_error( |
26537 | "llvm.eh.recoverfp must take a function as the first argument"); |
26538 | return recoverFramePointer(DAG, Fn, IncomingFPOp); |
26539 | } |
26540 | |
26541 | case Intrinsic::localaddress: { |
26542 | |
26543 | |
26544 | MachineFunction &MF = DAG.getMachineFunction(); |
26545 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
26546 | unsigned Reg; |
26547 | if (RegInfo->hasBasePointer(MF)) |
26548 | Reg = RegInfo->getBaseRegister(); |
26549 | else { |
26550 | bool CantUseFP = RegInfo->hasStackRealignment(MF); |
26551 | if (CantUseFP) |
26552 | Reg = RegInfo->getPtrSizedStackRegister(MF); |
26553 | else |
26554 | Reg = RegInfo->getPtrSizedFrameRegister(MF); |
26555 | } |
26556 | return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); |
26557 | } |
26558 | case Intrinsic::swift_async_context_addr: { |
26559 | auto &MF = DAG.getMachineFunction(); |
26560 | auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
26561 | if (Subtarget.is64Bit()) { |
26562 | MF.getFrameInfo().setFrameAddressIsTaken(true); |
26563 | X86FI->setHasSwiftAsyncContext(true); |
26564 | return SDValue( |
26565 | DAG.getMachineNode( |
26566 | X86::SUB64ri8, dl, MVT::i64, |
26567 | DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64), |
26568 | DAG.getTargetConstant(8, dl, MVT::i32)), |
26569 | 0); |
26570 | } else { |
26571 | |
26572 | |
26573 | if (!X86FI->getSwiftAsyncContextFrameIdx()) |
26574 | X86FI->setSwiftAsyncContextFrameIdx( |
26575 | MF.getFrameInfo().CreateStackObject(4, Align(4), false)); |
26576 | return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); |
26577 | } |
26578 | } |
26579 | case Intrinsic::x86_avx512_vp2intersect_q_512: |
26580 | case Intrinsic::x86_avx512_vp2intersect_q_256: |
26581 | case Intrinsic::x86_avx512_vp2intersect_q_128: |
26582 | case Intrinsic::x86_avx512_vp2intersect_d_512: |
26583 | case Intrinsic::x86_avx512_vp2intersect_d_256: |
26584 | case Intrinsic::x86_avx512_vp2intersect_d_128: { |
26585 | MVT MaskVT = Op.getSimpleValueType(); |
26586 | |
26587 | SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other); |
26588 | SDLoc DL(Op); |
26589 | |
26590 | SDValue Operation = |
26591 | DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs, |
26592 | Op->getOperand(1), Op->getOperand(2)); |
26593 | |
26594 | SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL, |
26595 | MaskVT, Operation); |
26596 | SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL, |
26597 | MaskVT, Operation); |
26598 | return DAG.getMergeValues({Result0, Result1}, DL); |
26599 | } |
26600 | case Intrinsic::x86_mmx_pslli_w: |
26601 | case Intrinsic::x86_mmx_pslli_d: |
26602 | case Intrinsic::x86_mmx_pslli_q: |
26603 | case Intrinsic::x86_mmx_psrli_w: |
26604 | case Intrinsic::x86_mmx_psrli_d: |
26605 | case Intrinsic::x86_mmx_psrli_q: |
26606 | case Intrinsic::x86_mmx_psrai_w: |
26607 | case Intrinsic::x86_mmx_psrai_d: { |
26608 | SDLoc DL(Op); |
26609 | SDValue ShAmt = Op.getOperand(2); |
26610 | |
26611 | if (auto *C = dyn_cast<ConstantSDNode>(ShAmt)) { |
26612 | |
26613 | |
26614 | unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255); |
26615 | if (ShiftAmount == 0) |
26616 | return Op.getOperand(1); |
26617 | |
26618 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), |
26619 | Op.getOperand(0), Op.getOperand(1), |
26620 | DAG.getTargetConstant(ShiftAmount, DL, MVT::i32)); |
26621 | } |
26622 | |
26623 | unsigned NewIntrinsic; |
26624 | switch (IntNo) { |
26625 | default: llvm_unreachable("Impossible intrinsic"); |
26626 | case Intrinsic::x86_mmx_pslli_w: |
26627 | NewIntrinsic = Intrinsic::x86_mmx_psll_w; |
26628 | break; |
26629 | case Intrinsic::x86_mmx_pslli_d: |
26630 | NewIntrinsic = Intrinsic::x86_mmx_psll_d; |
26631 | break; |
26632 | case Intrinsic::x86_mmx_pslli_q: |
26633 | NewIntrinsic = Intrinsic::x86_mmx_psll_q; |
26634 | break; |
26635 | case Intrinsic::x86_mmx_psrli_w: |
26636 | NewIntrinsic = Intrinsic::x86_mmx_psrl_w; |
26637 | break; |
26638 | case Intrinsic::x86_mmx_psrli_d: |
26639 | NewIntrinsic = Intrinsic::x86_mmx_psrl_d; |
26640 | break; |
26641 | case Intrinsic::x86_mmx_psrli_q: |
26642 | NewIntrinsic = Intrinsic::x86_mmx_psrl_q; |
26643 | break; |
26644 | case Intrinsic::x86_mmx_psrai_w: |
26645 | NewIntrinsic = Intrinsic::x86_mmx_psra_w; |
26646 | break; |
26647 | case Intrinsic::x86_mmx_psrai_d: |
26648 | NewIntrinsic = Intrinsic::x86_mmx_psra_d; |
26649 | break; |
26650 | } |
26651 | |
26652 | |
26653 | |
26654 | |
26655 | ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt); |
26656 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), |
26657 | DAG.getTargetConstant(NewIntrinsic, DL, |
26658 | getPointerTy(DAG.getDataLayout())), |
26659 | Op.getOperand(1), ShAmt); |
26660 | } |
26661 | } |
26662 | } |
26663 | |
26664 | static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, |
26665 | SDValue Src, SDValue Mask, SDValue Base, |
26666 | SDValue Index, SDValue ScaleOp, SDValue Chain, |
26667 | const X86Subtarget &Subtarget) { |
26668 | SDLoc dl(Op); |
26669 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
26670 | |
26671 | if (!C) |
26672 | return SDValue(); |
26673 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26674 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
26675 | TLI.getPointerTy(DAG.getDataLayout())); |
26676 | EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger(); |
26677 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other); |
26678 | |
26679 | |
26680 | |
26681 | if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) |
26682 | Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); |
26683 | |
26684 | |
26685 | Mask = DAG.getBitcast(MaskVT, Mask); |
26686 | |
26687 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
26688 | |
26689 | SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale }; |
26690 | SDValue Res = |
26691 | DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops, |
26692 | MemIntr->getMemoryVT(), MemIntr->getMemOperand()); |
26693 | return DAG.getMergeValues({Res, Res.getValue(1)}, dl); |
26694 | } |
26695 | |
26696 | static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, |
26697 | SDValue Src, SDValue Mask, SDValue Base, |
26698 | SDValue Index, SDValue ScaleOp, SDValue Chain, |
26699 | const X86Subtarget &Subtarget) { |
26700 | MVT VT = Op.getSimpleValueType(); |
26701 | SDLoc dl(Op); |
26702 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
26703 | |
26704 | if (!C) |
26705 | return SDValue(); |
26706 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26707 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
26708 | TLI.getPointerTy(DAG.getDataLayout())); |
26709 | unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), |
26710 | VT.getVectorNumElements()); |
26711 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); |
26712 | |
26713 | |
26714 | |
26715 | if (Mask.getValueType() != MaskVT) |
26716 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26717 | |
26718 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other); |
26719 | |
26720 | |
26721 | |
26722 | if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) |
26723 | Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); |
26724 | |
26725 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
26726 | |
26727 | SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale }; |
26728 | SDValue Res = |
26729 | DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops, |
26730 | MemIntr->getMemoryVT(), MemIntr->getMemOperand()); |
26731 | return DAG.getMergeValues({Res, Res.getValue(1)}, dl); |
26732 | } |
26733 | |
26734 | static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, |
26735 | SDValue Src, SDValue Mask, SDValue Base, |
26736 | SDValue Index, SDValue ScaleOp, SDValue Chain, |
26737 | const X86Subtarget &Subtarget) { |
26738 | SDLoc dl(Op); |
26739 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
26740 | |
26741 | if (!C) |
26742 | return SDValue(); |
26743 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26744 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
26745 | TLI.getPointerTy(DAG.getDataLayout())); |
26746 | unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), |
26747 | Src.getSimpleValueType().getVectorNumElements()); |
26748 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); |
26749 | |
26750 | |
26751 | |
26752 | if (Mask.getValueType() != MaskVT) |
26753 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26754 | |
26755 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
26756 | |
26757 | SDVTList VTs = DAG.getVTList(MVT::Other); |
26758 | SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale}; |
26759 | SDValue Res = |
26760 | DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops, |
26761 | MemIntr->getMemoryVT(), MemIntr->getMemOperand()); |
26762 | return Res; |
26763 | } |
26764 | |
26765 | static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, |
26766 | SDValue Mask, SDValue Base, SDValue Index, |
26767 | SDValue ScaleOp, SDValue Chain, |
26768 | const X86Subtarget &Subtarget) { |
26769 | SDLoc dl(Op); |
26770 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
26771 | |
26772 | if (!C) |
26773 | return SDValue(); |
26774 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26775 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
26776 | TLI.getPointerTy(DAG.getDataLayout())); |
26777 | SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); |
26778 | SDValue Segment = DAG.getRegister(0, MVT::i32); |
26779 | MVT MaskVT = |
26780 | MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); |
26781 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26782 | SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain}; |
26783 | SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops); |
26784 | return SDValue(Res, 0); |
26785 | } |
26786 | |
26787 | |
26788 | |
26789 | |
26790 | |
26791 | |
26792 | |
26793 | |
26794 | |
26795 | static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL, |
26796 | SelectionDAG &DAG, |
26797 | unsigned TargetOpcode, |
26798 | unsigned SrcReg, |
26799 | const X86Subtarget &Subtarget, |
26800 | SmallVectorImpl<SDValue> &Results) { |
26801 | SDValue Chain = N->getOperand(0); |
26802 | SDValue Glue; |
26803 | |
26804 | if (SrcReg) { |
26805 | assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); |
26806 | Chain = DAG.getCopyToReg(Chain, DL, SrcReg, N->getOperand(2), Glue); |
26807 | Glue = Chain.getValue(1); |
26808 | } |
26809 | |
26810 | SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); |
26811 | SDValue N1Ops[] = {Chain, Glue}; |
26812 | SDNode *N1 = DAG.getMachineNode( |
26813 | TargetOpcode, DL, Tys, ArrayRef<SDValue>(N1Ops, Glue.getNode() ? 2 : 1)); |
26814 | Chain = SDValue(N1, 0); |
26815 | |
26816 | |
26817 | SDValue LO, HI; |
26818 | if (Subtarget.is64Bit()) { |
26819 | LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1)); |
26820 | HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, |
26821 | LO.getValue(2)); |
26822 | } else { |
26823 | LO = DAG.getCopyFromReg(Chain, DL, X86::EAX, MVT::i32, SDValue(N1, 1)); |
26824 | HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, |
26825 | LO.getValue(2)); |
26826 | } |
26827 | Chain = HI.getValue(1); |
26828 | Glue = HI.getValue(2); |
26829 | |
26830 | if (Subtarget.is64Bit()) { |
26831 | |
26832 | SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, |
26833 | DAG.getConstant(32, DL, MVT::i8)); |
26834 | Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); |
26835 | Results.push_back(Chain); |
26836 | return Glue; |
26837 | } |
26838 | |
26839 | |
26840 | SDValue Ops[] = { LO, HI }; |
26841 | SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); |
26842 | Results.push_back(Pair); |
26843 | Results.push_back(Chain); |
26844 | return Glue; |
26845 | } |
26846 | |
26847 | |
26848 | |
26849 | |
26850 | static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode, |
26851 | SelectionDAG &DAG, |
26852 | const X86Subtarget &Subtarget, |
26853 | SmallVectorImpl<SDValue> &Results) { |
26854 | |
26855 | |
26856 | |
26857 | SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode, |
26858 | 0, Subtarget, |
26859 | Results); |
26860 | if (Opcode != X86::RDTSCP) |
26861 | return; |
26862 | |
26863 | SDValue Chain = Results[1]; |
26864 | |
26865 | |
26866 | SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, Glue); |
26867 | Results[1] = ecx; |
26868 | Results.push_back(ecx.getValue(1)); |
26869 | } |
26870 | |
26871 | static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget, |
26872 | SelectionDAG &DAG) { |
26873 | SmallVector<SDValue, 3> Results; |
26874 | SDLoc DL(Op); |
26875 | getReadTimeStampCounter(Op.getNode(), DL, X86::RDTSC, DAG, Subtarget, |
26876 | Results); |
26877 | return DAG.getMergeValues(Results, DL); |
26878 | } |
26879 | |
26880 | static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) { |
26881 | MachineFunction &MF = DAG.getMachineFunction(); |
26882 | SDValue Chain = Op.getOperand(0); |
26883 | SDValue RegNode = Op.getOperand(2); |
26884 | WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); |
26885 | if (!EHInfo) |
26886 | report_fatal_error("EH registrations only live in functions using WinEH"); |
26887 | |
26888 | |
26889 | auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode); |
26890 | if (!FINode) |
26891 | report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca"); |
26892 | EHInfo->EHRegNodeFrameIndex = FINode->getIndex(); |
26893 | |
26894 | |
26895 | return Chain; |
26896 | } |
26897 | |
26898 | static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) { |
26899 | MachineFunction &MF = DAG.getMachineFunction(); |
26900 | SDValue Chain = Op.getOperand(0); |
26901 | SDValue EHGuard = Op.getOperand(2); |
26902 | WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); |
26903 | if (!EHInfo) |
26904 | report_fatal_error("EHGuard only live in functions using WinEH"); |
26905 | |
26906 | |
26907 | auto *FINode = dyn_cast<FrameIndexSDNode>(EHGuard); |
26908 | if (!FINode) |
26909 | report_fatal_error("llvm.x86.seh.ehguard expects a static alloca"); |
26910 | EHInfo->EHGuardFrameIndex = FINode->getIndex(); |
26911 | |
26912 | |
26913 | return Chain; |
26914 | } |
26915 | |
26916 | |
26917 | static SDValue |
26918 | EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val, |
26919 | SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, |
26920 | SelectionDAG &DAG) { |
26921 | SDVTList VTs = DAG.getVTList(MVT::Other); |
26922 | SDValue Undef = DAG.getUNDEF(Ptr.getValueType()); |
26923 | SDValue Ops[] = { Chain, Val, Ptr, Undef }; |
26924 | unsigned Opc = SignedSat ? X86ISD::VTRUNCSTORES : X86ISD::VTRUNCSTOREUS; |
26925 | return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO); |
26926 | } |
26927 | |
26928 | |
26929 | static SDValue |
26930 | EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, |
26931 | SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, |
26932 | MachineMemOperand *MMO, SelectionDAG &DAG) { |
26933 | SDVTList VTs = DAG.getVTList(MVT::Other); |
26934 | SDValue Ops[] = { Chain, Val, Ptr, Mask }; |
26935 | unsigned Opc = SignedSat ? X86ISD::VMTRUNCSTORES : X86ISD::VMTRUNCSTOREUS; |
26936 | return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO); |
26937 | } |
26938 | |
26939 | static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, |
26940 | SelectionDAG &DAG) { |
26941 | unsigned IntNo = Op.getConstantOperandVal(1); |
26942 | const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); |
26943 | if (!IntrData) { |
26944 | switch (IntNo) { |
26945 | case llvm::Intrinsic::x86_seh_ehregnode: |
26946 | return MarkEHRegistrationNode(Op, DAG); |
26947 | case llvm::Intrinsic::x86_seh_ehguard: |
26948 | return MarkEHGuard(Op, DAG); |
26949 | case llvm::Intrinsic::x86_rdpkru: { |
26950 | SDLoc dl(Op); |
26951 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
26952 | |
26953 | return DAG.getNode(X86ISD::RDPKRU, dl, VTs, Op.getOperand(0), |
26954 | DAG.getConstant(0, dl, MVT::i32)); |
26955 | } |
26956 | case llvm::Intrinsic::x86_wrpkru: { |
26957 | SDLoc dl(Op); |
26958 | |
26959 | |
26960 | return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other, |
26961 | Op.getOperand(0), Op.getOperand(2), |
26962 | DAG.getConstant(0, dl, MVT::i32), |
26963 | DAG.getConstant(0, dl, MVT::i32)); |
26964 | } |
26965 | case llvm::Intrinsic::x86_flags_read_u32: |
26966 | case llvm::Intrinsic::x86_flags_read_u64: |
26967 | case llvm::Intrinsic::x86_flags_write_u32: |
26968 | case llvm::Intrinsic::x86_flags_write_u64: { |
26969 | |
26970 | |
26971 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
26972 | MFI.setHasCopyImplyingStackAdjustment(true); |
26973 | |
26974 | |
26975 | return Op; |
26976 | } |
26977 | case Intrinsic::x86_lwpins32: |
26978 | case Intrinsic::x86_lwpins64: |
26979 | case Intrinsic::x86_umwait: |
26980 | case Intrinsic::x86_tpause: { |
26981 | SDLoc dl(Op); |
26982 | SDValue Chain = Op->getOperand(0); |
26983 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
26984 | unsigned Opcode; |
26985 | |
26986 | switch (IntNo) { |
26987 | default: llvm_unreachable("Impossible intrinsic"); |
26988 | case Intrinsic::x86_umwait: |
26989 | Opcode = X86ISD::UMWAIT; |
26990 | break; |
26991 | case Intrinsic::x86_tpause: |
26992 | Opcode = X86ISD::TPAUSE; |
26993 | break; |
26994 | case Intrinsic::x86_lwpins32: |
26995 | case Intrinsic::x86_lwpins64: |
26996 | Opcode = X86ISD::LWPINS; |
26997 | break; |
26998 | } |
26999 | |
27000 | SDValue Operation = |
27001 | DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2), |
27002 | Op->getOperand(3), Op->getOperand(4)); |
27003 | SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); |
27004 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, |
27005 | Operation.getValue(1)); |
27006 | } |
27007 | case Intrinsic::x86_enqcmd: |
27008 | case Intrinsic::x86_enqcmds: { |
27009 | SDLoc dl(Op); |
27010 | SDValue Chain = Op.getOperand(0); |
27011 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
27012 | unsigned Opcode; |
27013 | switch (IntNo) { |
27014 | default: llvm_unreachable("Impossible intrinsic!"); |
27015 | case Intrinsic::x86_enqcmd: |
27016 | Opcode = X86ISD::ENQCMD; |
27017 | break; |
27018 | case Intrinsic::x86_enqcmds: |
27019 | Opcode = X86ISD::ENQCMDS; |
27020 | break; |
27021 | } |
27022 | SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op.getOperand(2), |
27023 | Op.getOperand(3)); |
27024 | SDValue SetCC = getSETCC(X86::COND_E, Operation.getValue(0), dl, DAG); |
27025 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, |
27026 | Operation.getValue(1)); |
27027 | } |
27028 | case Intrinsic::x86_aesenc128kl: |
27029 | case Intrinsic::x86_aesdec128kl: |
27030 | case Intrinsic::x86_aesenc256kl: |
27031 | case Intrinsic::x86_aesdec256kl: { |
27032 | SDLoc DL(Op); |
27033 | SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::i32, MVT::Other); |
27034 | SDValue Chain = Op.getOperand(0); |
27035 | unsigned Opcode; |
27036 | |
27037 | switch (IntNo) { |
27038 | default: llvm_unreachable("Impossible intrinsic"); |
27039 | case Intrinsic::x86_aesenc128kl: |
27040 | Opcode = X86ISD::AESENC128KL; |
27041 | break; |
27042 | case Intrinsic::x86_aesdec128kl: |
27043 | Opcode = X86ISD::AESDEC128KL; |
27044 | break; |
27045 | case Intrinsic::x86_aesenc256kl: |
27046 | Opcode = X86ISD::AESENC256KL; |
27047 | break; |
27048 | case Intrinsic::x86_aesdec256kl: |
27049 | Opcode = X86ISD::AESDEC256KL; |
27050 | break; |
27051 | } |
27052 | |
27053 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
27054 | MachineMemOperand *MMO = MemIntr->getMemOperand(); |
27055 | EVT MemVT = MemIntr->getMemoryVT(); |
27056 | SDValue Operation = DAG.getMemIntrinsicNode( |
27057 | Opcode, DL, VTs, {Chain, Op.getOperand(2), Op.getOperand(3)}, MemVT, |
27058 | MMO); |
27059 | SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(1), DL, DAG); |
27060 | |
27061 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), |
27062 | {ZF, Operation.getValue(0), Operation.getValue(2)}); |
27063 | } |
27064 | case Intrinsic::x86_aesencwide128kl: |
27065 | case Intrinsic::x86_aesdecwide128kl: |
27066 | case Intrinsic::x86_aesencwide256kl: |
27067 | case Intrinsic::x86_aesdecwide256kl: { |
27068 | SDLoc DL(Op); |
27069 | SDVTList VTs = DAG.getVTList( |
27070 | {MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, |
27071 | MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other}); |
27072 | SDValue Chain = Op.getOperand(0); |
27073 | unsigned Opcode; |
27074 | |
27075 | switch (IntNo) { |
27076 | default: llvm_unreachable("Impossible intrinsic"); |
27077 | case Intrinsic::x86_aesencwide128kl: |
27078 | Opcode = X86ISD::AESENCWIDE128KL; |
27079 | break; |
27080 | case Intrinsic::x86_aesdecwide128kl: |
27081 | Opcode = X86ISD::AESDECWIDE128KL; |
27082 | break; |
27083 | case Intrinsic::x86_aesencwide256kl: |
27084 | Opcode = X86ISD::AESENCWIDE256KL; |
27085 | break; |
27086 | case Intrinsic::x86_aesdecwide256kl: |
27087 | Opcode = X86ISD::AESDECWIDE256KL; |
27088 | break; |
27089 | } |
27090 | |
27091 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
27092 | MachineMemOperand *MMO = MemIntr->getMemOperand(); |
27093 | EVT MemVT = MemIntr->getMemoryVT(); |
27094 | SDValue Operation = DAG.getMemIntrinsicNode( |
27095 | Opcode, DL, VTs, |
27096 | {Chain, Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), |
27097 | Op.getOperand(5), Op.getOperand(6), Op.getOperand(7), |
27098 | Op.getOperand(8), Op.getOperand(9), Op.getOperand(10)}, |
27099 | MemVT, MMO); |
27100 | SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(0), DL, DAG); |
27101 | |
27102 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), |
27103 | {ZF, Operation.getValue(1), Operation.getValue(2), |
27104 | Operation.getValue(3), Operation.getValue(4), |
27105 | Operation.getValue(5), Operation.getValue(6), |
27106 | Operation.getValue(7), Operation.getValue(8), |
27107 | Operation.getValue(9)}); |
27108 | } |
27109 | case Intrinsic::x86_testui: { |
27110 | SDLoc dl(Op); |
27111 | SDValue Chain = Op.getOperand(0); |
27112 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
27113 | SDValue Operation = DAG.getNode(X86ISD::TESTUI, dl, VTs, Chain); |
27114 | SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); |
27115 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, |
27116 | Operation.getValue(1)); |
27117 | } |
27118 | } |
27119 | return SDValue(); |
27120 | } |
27121 | |
27122 | SDLoc dl(Op); |
27123 | switch(IntrData->Type) { |
27124 | default: llvm_unreachable("Unknown Intrinsic Type"); |
27125 | case RDSEED: |
27126 | case RDRAND: { |
27127 | |
27128 | SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32, MVT::Other); |
27129 | SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); |
27130 | |
27131 | |
27132 | |
27133 | SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), |
27134 | DAG.getConstant(1, dl, Op->getValueType(1)), |
27135 | DAG.getTargetConstant(X86::COND_B, dl, MVT::i8), |
27136 | SDValue(Result.getNode(), 1)}; |
27137 | SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops); |
27138 | |
27139 | |
27140 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, |
27141 | SDValue(Result.getNode(), 2)); |
27142 | } |
27143 | case GATHER_AVX2: { |
27144 | SDValue Chain = Op.getOperand(0); |
27145 | SDValue Src = Op.getOperand(2); |
27146 | SDValue Base = Op.getOperand(3); |
27147 | SDValue Index = Op.getOperand(4); |
27148 | SDValue Mask = Op.getOperand(5); |
27149 | SDValue Scale = Op.getOperand(6); |
27150 | return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, |
27151 | Scale, Chain, Subtarget); |
27152 | } |
27153 | case GATHER: { |
27154 | |
27155 | SDValue Chain = Op.getOperand(0); |
27156 | SDValue Src = Op.getOperand(2); |
27157 | SDValue Base = Op.getOperand(3); |
27158 | SDValue Index = Op.getOperand(4); |
27159 | SDValue Mask = Op.getOperand(5); |
27160 | SDValue Scale = Op.getOperand(6); |
27161 | return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale, |
27162 | Chain, Subtarget); |
27163 | } |
27164 | case SCATTER: { |
27165 | |
27166 | SDValue Chain = Op.getOperand(0); |
27167 | SDValue Base = Op.getOperand(2); |
27168 | SDValue Mask = Op.getOperand(3); |
27169 | SDValue Index = Op.getOperand(4); |
27170 | SDValue Src = Op.getOperand(5); |
27171 | SDValue Scale = Op.getOperand(6); |
27172 | return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, |
27173 | Scale, Chain, Subtarget); |
27174 | } |
27175 | case PREFETCH: { |
27176 | const APInt &HintVal = Op.getConstantOperandAPInt(6); |
27177 | assert((HintVal == 2 || HintVal == 3) && |
27178 | "Wrong prefetch hint in intrinsic: should be 2 or 3"); |
27179 | unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0); |
27180 | SDValue Chain = Op.getOperand(0); |
27181 | SDValue Mask = Op.getOperand(2); |
27182 | SDValue Index = Op.getOperand(3); |
27183 | SDValue Base = Op.getOperand(4); |
27184 | SDValue Scale = Op.getOperand(5); |
27185 | return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain, |
27186 | Subtarget); |
27187 | } |
27188 | |
27189 | case RDTSC: { |
27190 | SmallVector<SDValue, 2> Results; |
27191 | getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, |
27192 | Results); |
27193 | return DAG.getMergeValues(Results, dl); |
27194 | } |
27195 | |
27196 | case RDPMC: |
27197 | |
27198 | case XGETBV: { |
27199 | SmallVector<SDValue, 2> Results; |
27200 | |
27201 | |
27202 | |
27203 | |
27204 | expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX, |
27205 | Subtarget, Results); |
27206 | return DAG.getMergeValues(Results, dl); |
27207 | } |
27208 | |
27209 | case XTEST: { |
27210 | SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); |
27211 | SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); |
27212 | |
27213 | SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG); |
27214 | SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); |
27215 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), |
27216 | Ret, SDValue(InTrans.getNode(), 1)); |
27217 | } |
27218 | case TRUNCATE_TO_MEM_VI8: |
27219 | case TRUNCATE_TO_MEM_VI16: |
27220 | case TRUNCATE_TO_MEM_VI32: { |
27221 | SDValue Mask = Op.getOperand(4); |
27222 | SDValue DataToTruncate = Op.getOperand(3); |
27223 | SDValue Addr = Op.getOperand(2); |
27224 | SDValue Chain = Op.getOperand(0); |
27225 | |
27226 | MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op); |
27227 | assert(MemIntr && "Expected MemIntrinsicSDNode!"); |
27228 | |
27229 | EVT MemVT = MemIntr->getMemoryVT(); |
27230 | |
27231 | uint16_t TruncationOp = IntrData->Opc0; |
27232 | switch (TruncationOp) { |
27233 | case X86ISD::VTRUNC: { |
27234 | if (isAllOnesConstant(Mask)) |
27235 | return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT, |
27236 | MemIntr->getMemOperand()); |
27237 | |
27238 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); |
27239 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
27240 | SDValue Offset = DAG.getUNDEF(VMask.getValueType()); |
27241 | |
27242 | return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, Offset, VMask, |
27243 | MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED, |
27244 | true ); |
27245 | } |
27246 | case X86ISD::VTRUNCUS: |
27247 | case X86ISD::VTRUNCS: { |
27248 | bool IsSigned = (TruncationOp == X86ISD::VTRUNCS); |
27249 | if (isAllOnesConstant(Mask)) |
27250 | return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT, |
27251 | MemIntr->getMemOperand(), DAG); |
27252 | |
27253 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); |
27254 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
27255 | |
27256 | return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, |
27257 | VMask, MemVT, MemIntr->getMemOperand(), DAG); |
27258 | } |
27259 | default: |
27260 | llvm_unreachable("Unsupported truncstore intrinsic"); |
27261 | } |
27262 | } |
27263 | } |
27264 | } |
27265 | |
27266 | SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, |
27267 | SelectionDAG &DAG) const { |
27268 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
27269 | MFI.setReturnAddressIsTaken(true); |
27270 | |
27271 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
27272 | return SDValue(); |
27273 | |
27274 | unsigned Depth = Op.getConstantOperandVal(0); |
27275 | SDLoc dl(Op); |
27276 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
27277 | |
27278 | if (Depth > 0) { |
27279 | SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); |
27280 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
27281 | SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT); |
27282 | return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), |
27283 | DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), |
27284 | MachinePointerInfo()); |
27285 | } |
27286 | |
27287 | |
27288 | SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); |
27289 | return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, |
27290 | MachinePointerInfo()); |
27291 | } |
27292 | |
27293 | SDValue X86TargetLowering::LowerADDROFRETURNADDR(SDValue Op, |
27294 | SelectionDAG &DAG) const { |
27295 | DAG.getMachineFunction().getFrameInfo().setReturnAddressIsTaken(true); |
27296 | return getReturnAddressFrameIndex(DAG); |
27297 | } |
27298 | |
27299 | SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { |
27300 | MachineFunction &MF = DAG.getMachineFunction(); |
27301 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
27302 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
27303 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
27304 | EVT VT = Op.getValueType(); |
27305 | |
27306 | MFI.setFrameAddressIsTaken(true); |
27307 | |
27308 | if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { |
27309 | |
27310 | |
27311 | |
27312 | int FrameAddrIndex = FuncInfo->getFAIndex(); |
27313 | if (!FrameAddrIndex) { |
27314 | |
27315 | unsigned SlotSize = RegInfo->getSlotSize(); |
27316 | FrameAddrIndex = MF.getFrameInfo().CreateFixedObject( |
27317 | SlotSize, 0, false); |
27318 | FuncInfo->setFAIndex(FrameAddrIndex); |
27319 | } |
27320 | return DAG.getFrameIndex(FrameAddrIndex, VT); |
27321 | } |
27322 | |
27323 | unsigned FrameReg = |
27324 | RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); |
27325 | SDLoc dl(Op); |
27326 | unsigned Depth = Op.getConstantOperandVal(0); |
27327 | assert(((FrameReg == X86::RBP && VT == MVT::i64) || |
27328 | (FrameReg == X86::EBP && VT == MVT::i32)) && |
27329 | "Invalid Frame Register!"); |
27330 | SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); |
27331 | while (Depth--) |
27332 | FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, |
27333 | MachinePointerInfo()); |
27334 | return FrameAddr; |
27335 | } |
27336 | |
27337 | |
27338 | |
27339 | Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT, |
27340 | const MachineFunction &MF) const { |
27341 | const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); |
27342 | |
27343 | Register Reg = StringSwitch<unsigned>(RegName) |
27344 | .Case("esp", X86::ESP) |
27345 | .Case("rsp", X86::RSP) |
27346 | .Case("ebp", X86::EBP) |
27347 | .Case("rbp", X86::RBP) |
27348 | .Default(0); |
27349 | |
27350 | if (Reg == X86::EBP || Reg == X86::RBP) { |
27351 | if (!TFI.hasFP(MF)) |
27352 | report_fatal_error("register " + StringRef(RegName) + |
27353 | " is allocatable: function has no frame pointer"); |
27354 | #ifndef NDEBUG |
27355 | else { |
27356 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
27357 | Register FrameReg = RegInfo->getPtrSizedFrameRegister(MF); |
27358 | assert((FrameReg == X86::EBP || FrameReg == X86::RBP) && |
27359 | "Invalid Frame Register!"); |
27360 | } |
27361 | #endif |
27362 | } |
27363 | |
27364 | if (Reg) |
27365 | return Reg; |
27366 | |
27367 | report_fatal_error("Invalid register name global variable"); |
27368 | } |
27369 | |
27370 | SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, |
27371 | SelectionDAG &DAG) const { |
27372 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
27373 | return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); |
27374 | } |
27375 | |
27376 | Register X86TargetLowering::getExceptionPointerRegister( |
27377 | const Constant *PersonalityFn) const { |
27378 | if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR) |
27379 | return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; |
27380 | |
27381 | return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX; |
27382 | } |
27383 | |
27384 | Register X86TargetLowering::getExceptionSelectorRegister( |
27385 | const Constant *PersonalityFn) const { |
27386 | |
27387 | if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) |
27388 | return X86::NoRegister; |
27389 | return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; |
27390 | } |
27391 | |
27392 | bool X86TargetLowering::needsFixedCatchObjects() const { |
27393 | return Subtarget.isTargetWin64(); |
27394 | } |
27395 | |
27396 | SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { |
27397 | SDValue Chain = Op.getOperand(0); |
27398 | SDValue Offset = Op.getOperand(1); |
27399 | SDValue Handler = Op.getOperand(2); |
27400 | SDLoc dl (Op); |
27401 | |
27402 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
27403 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
27404 | Register FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); |
27405 | assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || |
27406 | (FrameReg == X86::EBP && PtrVT == MVT::i32)) && |
27407 | "Invalid Frame Register!"); |
27408 | SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); |
27409 | Register StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; |
27410 | |
27411 | SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, |
27412 | DAG.getIntPtrConstant(RegInfo->getSlotSize(), |
27413 | dl)); |
27414 | StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); |
27415 | Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo()); |
27416 | Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); |
27417 | |
27418 | return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain, |
27419 | DAG.getRegister(StoreAddrReg, PtrVT)); |
27420 | } |
27421 | |
27422 | SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, |
27423 | SelectionDAG &DAG) const { |
27424 | SDLoc DL(Op); |
27425 | |
27426 | |
27427 | |
27428 | |
27429 | |
27430 | |
27431 | if (!Subtarget.is64Bit()) { |
27432 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
27433 | (void)TII->getGlobalBaseReg(&DAG.getMachineFunction()); |
27434 | } |
27435 | return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL, |
27436 | DAG.getVTList(MVT::i32, MVT::Other), |
27437 | Op.getOperand(0), Op.getOperand(1)); |
27438 | } |
27439 | |
27440 | SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, |
27441 | SelectionDAG &DAG) const { |
27442 | SDLoc DL(Op); |
27443 | return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, |
27444 | Op.getOperand(0), Op.getOperand(1)); |
27445 | } |
27446 | |
27447 | SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, |
27448 | SelectionDAG &DAG) const { |
27449 | SDLoc DL(Op); |
27450 | return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other, |
27451 | Op.getOperand(0)); |
27452 | } |
27453 | |
27454 | static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) { |
27455 | return Op.getOperand(0); |
27456 | } |
27457 | |
27458 | SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, |
27459 | SelectionDAG &DAG) const { |
27460 | SDValue Root = Op.getOperand(0); |
27461 | SDValue Trmp = Op.getOperand(1); |
27462 | SDValue FPtr = Op.getOperand(2); |
27463 | SDValue Nest = Op.getOperand(3); |
27464 | SDLoc dl (Op); |
27465 | |
27466 | const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); |
27467 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
27468 | |
27469 | if (Subtarget.is64Bit()) { |
27470 | SDValue OutChains[6]; |
27471 | |
27472 | |
27473 | const unsigned char JMP64r = 0xFF; |
27474 | const unsigned char MOV64ri = 0xB8; |
27475 | |
27476 | const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7; |
27477 | const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7; |
27478 | |
27479 | const unsigned char REX_WB = 0x40 | 0x08 | 0x01; |
27480 | |
27481 | |
27482 | unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; |
27483 | SDValue Addr = Trmp; |
27484 | OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), |
27485 | Addr, MachinePointerInfo(TrmpAddr)); |
27486 | |
27487 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
27488 | DAG.getConstant(2, dl, MVT::i64)); |
27489 | OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, |
27490 | MachinePointerInfo(TrmpAddr, 2), Align(2)); |
27491 | |
27492 | |
27493 | |
27494 | OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; |
27495 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
27496 | DAG.getConstant(10, dl, MVT::i64)); |
27497 | OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), |
27498 | Addr, MachinePointerInfo(TrmpAddr, 10)); |
27499 | |
27500 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
27501 | DAG.getConstant(12, dl, MVT::i64)); |
27502 | OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, |
27503 | MachinePointerInfo(TrmpAddr, 12), Align(2)); |
27504 | |
27505 | |
27506 | OpCode = (JMP64r << 8) | REX_WB; |
27507 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
27508 | DAG.getConstant(20, dl, MVT::i64)); |
27509 | OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), |
27510 | Addr, MachinePointerInfo(TrmpAddr, 20)); |
27511 | |
27512 | unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); |
27513 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
27514 | DAG.getConstant(22, dl, MVT::i64)); |
27515 | OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8), |
27516 | Addr, MachinePointerInfo(TrmpAddr, 22)); |
27517 | |
27518 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
27519 | } else { |
27520 | const Function *Func = |
27521 | cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); |
27522 | CallingConv::ID CC = Func->getCallingConv(); |
27523 | unsigned NestReg; |
27524 | |
27525 | switch (CC) { |
27526 | default: |
27527 | llvm_unreachable("Unsupported calling convention"); |
27528 | case CallingConv::C: |
27529 | case CallingConv::X86_StdCall: { |
27530 | |
27531 | |
27532 | NestReg = X86::ECX; |
27533 | |
27534 | |
27535 | FunctionType *FTy = Func->getFunctionType(); |
27536 | const AttributeList &Attrs = Func->getAttributes(); |
27537 | |
27538 | if (!Attrs.isEmpty() && !Func->isVarArg()) { |
27539 | unsigned InRegCount = 0; |
27540 | unsigned Idx = 0; |
27541 | |
27542 | for (FunctionType::param_iterator I = FTy->param_begin(), |
27543 | E = FTy->param_end(); I != E; ++I, ++Idx) |
27544 | if (Attrs.hasParamAttr(Idx, Attribute::InReg)) { |
27545 | const DataLayout &DL = DAG.getDataLayout(); |
27546 | |
27547 | InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32; |
27548 | } |
27549 | |
27550 | if (InRegCount > 2) { |
27551 | report_fatal_error("Nest register in use - reduce number of inreg" |
27552 | " parameters!"); |
27553 | } |
27554 | } |
27555 | break; |
27556 | } |
27557 | case CallingConv::X86_FastCall: |
27558 | case CallingConv::X86_ThisCall: |
27559 | case CallingConv::Fast: |
27560 | case CallingConv::Tail: |
27561 | case CallingConv::SwiftTail: |
27562 | |
27563 | |
27564 | NestReg = X86::EAX; |
27565 | break; |
27566 | } |
27567 | |
27568 | SDValue OutChains[4]; |
27569 | SDValue Addr, Disp; |
27570 | |
27571 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
27572 | DAG.getConstant(10, dl, MVT::i32)); |
27573 | Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); |
27574 | |
27575 | |
27576 | const unsigned char MOV32ri = 0xB8; |
27577 | const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7; |
27578 | OutChains[0] = |
27579 | DAG.getStore(Root, dl, DAG.getConstant(MOV32ri | N86Reg, dl, MVT::i8), |
27580 | Trmp, MachinePointerInfo(TrmpAddr)); |
27581 | |
27582 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
27583 | DAG.getConstant(1, dl, MVT::i32)); |
27584 | OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, |
27585 | MachinePointerInfo(TrmpAddr, 1), Align(1)); |
27586 | |
27587 | const unsigned char JMP = 0xE9; |
27588 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
27589 | DAG.getConstant(5, dl, MVT::i32)); |
27590 | OutChains[2] = |
27591 | DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), Addr, |
27592 | MachinePointerInfo(TrmpAddr, 5), Align(1)); |
27593 | |
27594 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
27595 | DAG.getConstant(6, dl, MVT::i32)); |
27596 | OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, |
27597 | MachinePointerInfo(TrmpAddr, 6), Align(1)); |
27598 | |
27599 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
27600 | } |
27601 | } |
27602 | |
27603 | SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, |
27604 | SelectionDAG &DAG) const { |
27605 | |
27606 | |
27607 | |
27608 | |
27609 | |
27610 | |
27611 | |
27612 | |
27613 | |
27614 | |
27615 | |
27616 | |
27617 | |
27618 | |
27619 | |
27620 | |
27621 | |
27622 | |
27623 | |
27624 | |
27625 | |
27626 | |
27627 | MachineFunction &MF = DAG.getMachineFunction(); |
27628 | MVT VT = Op.getSimpleValueType(); |
27629 | SDLoc DL(Op); |
27630 | |
27631 | |
27632 | int SSFI = MF.getFrameInfo().CreateStackObject(2, Align(2), false); |
27633 | SDValue StackSlot = |
27634 | DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout())); |
27635 | |
27636 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); |
27637 | |
27638 | SDValue Chain = Op.getOperand(0); |
27639 | SDValue Ops[] = {Chain, StackSlot}; |
27640 | Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, |
27641 | DAG.getVTList(MVT::Other), Ops, MVT::i16, MPI, |
27642 | Align(2), MachineMemOperand::MOStore); |
27643 | |
27644 | |
27645 | SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI, Align(2)); |
27646 | Chain = CWD.getValue(1); |
27647 | |
27648 | |
27649 | SDValue Shift = |
27650 | DAG.getNode(ISD::SRL, DL, MVT::i16, |
27651 | DAG.getNode(ISD::AND, DL, MVT::i16, |
27652 | CWD, DAG.getConstant(0xc00, DL, MVT::i16)), |
27653 | DAG.getConstant(9, DL, MVT::i8)); |
27654 | Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift); |
27655 | |
27656 | SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32); |
27657 | SDValue RetVal = |
27658 | DAG.getNode(ISD::AND, DL, MVT::i32, |
27659 | DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift), |
27660 | DAG.getConstant(3, DL, MVT::i32)); |
27661 | |
27662 | RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT); |
27663 | |
27664 | return DAG.getMergeValues({RetVal, Chain}, DL); |
27665 | } |
27666 | |
27667 | SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op, |
27668 | SelectionDAG &DAG) const { |
27669 | MachineFunction &MF = DAG.getMachineFunction(); |
27670 | SDLoc DL(Op); |
27671 | SDValue Chain = Op.getNode()->getOperand(0); |
27672 | |
27673 | |
27674 | |
27675 | int OldCWFrameIdx = MF.getFrameInfo().CreateStackObject(4, Align(4), false); |
27676 | SDValue StackSlot = |
27677 | DAG.getFrameIndex(OldCWFrameIdx, getPointerTy(DAG.getDataLayout())); |
27678 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, OldCWFrameIdx); |
27679 | MachineMemOperand *MMO = |
27680 | MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 2, Align(2)); |
27681 | |
27682 | |
27683 | SDValue Ops[] = {Chain, StackSlot}; |
27684 | Chain = DAG.getMemIntrinsicNode( |
27685 | X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO); |
27686 | |
27687 | |
27688 | SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI); |
27689 | Chain = CWD.getValue(1); |
27690 | CWD = DAG.getNode(ISD::AND, DL, MVT::i16, CWD.getValue(0), |
27691 | DAG.getConstant(0xf3ff, DL, MVT::i16)); |
27692 | |
27693 | |
27694 | SDValue NewRM = Op.getNode()->getOperand(1); |
27695 | SDValue RMBits; |
27696 | if (auto *CVal = dyn_cast<ConstantSDNode>(NewRM)) { |
27697 | uint64_t RM = CVal->getZExtValue(); |
27698 | int FieldVal; |
27699 | switch (static_cast<RoundingMode>(RM)) { |
27700 | case RoundingMode::NearestTiesToEven: FieldVal = X86::rmToNearest; break; |
27701 | case RoundingMode::TowardNegative: FieldVal = X86::rmDownward; break; |
27702 | case RoundingMode::TowardPositive: FieldVal = X86::rmUpward; break; |
27703 | case RoundingMode::TowardZero: FieldVal = X86::rmTowardZero; break; |
27704 | default: |
27705 | llvm_unreachable("rounding mode is not supported by X86 hardware"); |
27706 | } |
27707 | RMBits = DAG.getConstant(FieldVal, DL, MVT::i16); |
27708 | } else { |
27709 | |
27710 | |
27711 | |
27712 | |
27713 | |
27714 | |
27715 | |
27716 | |
27717 | |
27718 | |
27719 | |
27720 | |
27721 | SDValue ShiftValue = |
27722 | DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, |
27723 | DAG.getNode(ISD::ADD, DL, MVT::i32, |
27724 | DAG.getNode(ISD::SHL, DL, MVT::i32, NewRM, |
27725 | DAG.getConstant(1, DL, MVT::i8)), |
27726 | DAG.getConstant(4, DL, MVT::i32))); |
27727 | SDValue Shifted = |
27728 | DAG.getNode(ISD::SHL, DL, MVT::i16, DAG.getConstant(0xc9, DL, MVT::i16), |
27729 | ShiftValue); |
27730 | RMBits = DAG.getNode(ISD::AND, DL, MVT::i16, Shifted, |
27731 | DAG.getConstant(0xc00, DL, MVT::i16)); |
27732 | } |
27733 | |
27734 | |
27735 | CWD = DAG.getNode(ISD::OR, DL, MVT::i16, CWD, RMBits); |
27736 | Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, 2); |
27737 | |
27738 | |
27739 | SDValue OpsLD[] = {Chain, StackSlot}; |
27740 | MachineMemOperand *MMOL = |
27741 | MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 2, Align(2)); |
27742 | Chain = DAG.getMemIntrinsicNode( |
27743 | X86ISD::FLDCW16m, DL, DAG.getVTList(MVT::Other), OpsLD, MVT::i16, MMOL); |
27744 | |
27745 | |
27746 | |
27747 | if (Subtarget.hasSSE1()) { |
27748 | |
27749 | Chain = DAG.getNode( |
27750 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, |
27751 | DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32), |
27752 | StackSlot); |
27753 | |
27754 | |
27755 | SDValue CWD = DAG.getLoad(MVT::i32, DL, Chain, StackSlot, MPI); |
27756 | Chain = CWD.getValue(1); |
27757 | CWD = DAG.getNode(ISD::AND, DL, MVT::i32, CWD.getValue(0), |
27758 | DAG.getConstant(0xffff9fff, DL, MVT::i32)); |
27759 | |
27760 | |
27761 | RMBits = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, RMBits); |
27762 | RMBits = DAG.getNode(ISD::SHL, DL, MVT::i32, RMBits, |
27763 | DAG.getConstant(3, DL, MVT::i8)); |
27764 | |
27765 | |
27766 | CWD = DAG.getNode(ISD::OR, DL, MVT::i32, CWD, RMBits); |
27767 | Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, 4); |
27768 | |
27769 | |
27770 | Chain = DAG.getNode( |
27771 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, |
27772 | DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32), |
27773 | StackSlot); |
27774 | } |
27775 | |
27776 | return Chain; |
27777 | } |
27778 | |
27779 | |
27780 | |
27781 | |
27782 | |
27783 | |
27784 | |
27785 | static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG, |
27786 | const X86Subtarget &Subtarget) { |
27787 | assert(Op.getOpcode() == ISD::CTLZ); |
27788 | SDLoc dl(Op); |
27789 | MVT VT = Op.getSimpleValueType(); |
27790 | MVT EltVT = VT.getVectorElementType(); |
27791 | unsigned NumElems = VT.getVectorNumElements(); |
27792 | |
27793 | assert((EltVT == MVT::i8 || EltVT == MVT::i16) && |
27794 | "Unsupported element type"); |
27795 | |
27796 | |
27797 | if (NumElems > 16 || |
27798 | (NumElems == 16 && !Subtarget.canExtendTo512DQ())) |
27799 | return splitVectorIntUnary(Op, DAG); |
27800 | |
27801 | MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); |
27802 | assert((NewVT.is256BitVector() || NewVT.is512BitVector()) && |
27803 | "Unsupported value type for operation"); |
27804 | |
27805 | |
27806 | Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0)); |
27807 | SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op); |
27808 | SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode); |
27809 | SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT); |
27810 | |
27811 | return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta); |
27812 | } |
27813 | |
27814 | |
27815 | static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL, |
27816 | const X86Subtarget &Subtarget, |
27817 | SelectionDAG &DAG) { |
27818 | MVT VT = Op.getSimpleValueType(); |
27819 | int NumElts = VT.getVectorNumElements(); |
27820 | int NumBytes = NumElts * (VT.getScalarSizeInBits() / 8); |
27821 | MVT CurrVT = MVT::getVectorVT(MVT::i8, NumBytes); |
27822 | |
27823 | |
27824 | const int LUT[16] = { 4, 3, 2, 2, |
27825 | 1, 1, 1, 1, |
27826 | 0, 0, 0, 0, |
27827 | 0, 0, 0, 0}; |
27828 | |
27829 | SmallVector<SDValue, 64> LUTVec; |
27830 | for (int i = 0; i < NumBytes; ++i) |
27831 | LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); |
27832 | SDValue InRegLUT = DAG.getBuildVector(CurrVT, DL, LUTVec); |
27833 | |
27834 | |
27835 | |
27836 | |
27837 | |
27838 | |
27839 | SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0)); |
27840 | SDValue Zero = DAG.getConstant(0, DL, CurrVT); |
27841 | |
27842 | SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT); |
27843 | SDValue Lo = Op0; |
27844 | SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift); |
27845 | SDValue HiZ; |
27846 | if (CurrVT.is512BitVector()) { |
27847 | MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements()); |
27848 | HiZ = DAG.getSetCC(DL, MaskVT, Hi, Zero, ISD::SETEQ); |
27849 | HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ); |
27850 | } else { |
27851 | HiZ = DAG.getSetCC(DL, CurrVT, Hi, Zero, ISD::SETEQ); |
27852 | } |
27853 | |
27854 | Lo = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Lo); |
27855 | Hi = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Hi); |
27856 | Lo = DAG.getNode(ISD::AND, DL, CurrVT, Lo, HiZ); |
27857 | SDValue Res = DAG.getNode(ISD::ADD, DL, CurrVT, Lo, Hi); |
27858 | |
27859 | |
27860 | |
27861 | |
27862 | |
27863 | |
27864 | while (CurrVT != VT) { |
27865 | int CurrScalarSizeInBits = CurrVT.getScalarSizeInBits(); |
27866 | int CurrNumElts = CurrVT.getVectorNumElements(); |
27867 | MVT NextSVT = MVT::getIntegerVT(CurrScalarSizeInBits * 2); |
27868 | MVT NextVT = MVT::getVectorVT(NextSVT, CurrNumElts / 2); |
27869 | SDValue Shift = DAG.getConstant(CurrScalarSizeInBits, DL, NextVT); |
27870 | |
27871 | |
27872 | if (CurrVT.is512BitVector()) { |
27873 | MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements()); |
27874 | HiZ = DAG.getSetCC(DL, MaskVT, DAG.getBitcast(CurrVT, Op0), |
27875 | DAG.getBitcast(CurrVT, Zero), ISD::SETEQ); |
27876 | HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ); |
27877 | } else { |
27878 | HiZ = DAG.getSetCC(DL, CurrVT, DAG.getBitcast(CurrVT, Op0), |
27879 | DAG.getBitcast(CurrVT, Zero), ISD::SETEQ); |
27880 | } |
27881 | HiZ = DAG.getBitcast(NextVT, HiZ); |
27882 | |
27883 | |
27884 | |
27885 | |
27886 | SDValue ResNext = Res = DAG.getBitcast(NextVT, Res); |
27887 | SDValue R0 = DAG.getNode(ISD::SRL, DL, NextVT, ResNext, Shift); |
27888 | SDValue R1 = DAG.getNode(ISD::SRL, DL, NextVT, HiZ, Shift); |
27889 | R1 = DAG.getNode(ISD::AND, DL, NextVT, ResNext, R1); |
27890 | Res = DAG.getNode(ISD::ADD, DL, NextVT, R0, R1); |
27891 | CurrVT = NextVT; |
27892 | } |
27893 | |
27894 | return Res; |
27895 | } |
27896 | |
27897 | static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL, |
27898 | const X86Subtarget &Subtarget, |
27899 | SelectionDAG &DAG) { |
27900 | MVT VT = Op.getSimpleValueType(); |
27901 | |
27902 | if (Subtarget.hasCDI() && |
27903 | |
27904 | (Subtarget.canExtendTo512DQ() || VT.getVectorElementType() != MVT::i8)) |
27905 | return LowerVectorCTLZ_AVX512CDI(Op, DAG, Subtarget); |
27906 | |
27907 | |
27908 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
27909 | return splitVectorIntUnary(Op, DAG); |
27910 | |
27911 | |
27912 | if (VT.is512BitVector() && !Subtarget.hasBWI()) |
27913 | return splitVectorIntUnary(Op, DAG); |
27914 | |
27915 | assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB"); |
27916 | return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG); |
27917 | } |
27918 | |
27919 | static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget, |
27920 | SelectionDAG &DAG) { |
27921 | MVT VT = Op.getSimpleValueType(); |
27922 | MVT OpVT = VT; |
27923 | unsigned NumBits = VT.getSizeInBits(); |
27924 | SDLoc dl(Op); |
27925 | unsigned Opc = Op.getOpcode(); |
27926 | |
27927 | if (VT.isVector()) |
27928 | return LowerVectorCTLZ(Op, dl, Subtarget, DAG); |
27929 | |
27930 | Op = Op.getOperand(0); |
27931 | if (VT == MVT::i8) { |
27932 | |
27933 | OpVT = MVT::i32; |
27934 | Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); |
27935 | } |
27936 | |
27937 | |
27938 | SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); |
27939 | Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); |
27940 | |
27941 | if (Opc == ISD::CTLZ) { |
27942 | |
27943 | SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), |
27944 | DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), |
27945 | Op.getValue(1)}; |
27946 | Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); |
27947 | } |
27948 | |
27949 | |
27950 | Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, |
27951 | DAG.getConstant(NumBits - 1, dl, OpVT)); |
27952 | |
27953 | if (VT == MVT::i8) |
27954 | Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); |
27955 | return Op; |
27956 | } |
27957 | |
27958 | static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget, |
27959 | SelectionDAG &DAG) { |
27960 | MVT VT = Op.getSimpleValueType(); |
27961 | unsigned NumBits = VT.getScalarSizeInBits(); |
27962 | SDValue N0 = Op.getOperand(0); |
27963 | SDLoc dl(Op); |
27964 | |
27965 | assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ && |
27966 | "Only scalar CTTZ requires custom lowering"); |
27967 | |
27968 | |
27969 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
27970 | Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0); |
27971 | |
27972 | |
27973 | SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT), |
27974 | DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), |
27975 | Op.getValue(1)}; |
27976 | return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); |
27977 | } |
27978 | |
27979 | static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG, |
27980 | const X86Subtarget &Subtarget) { |
27981 | MVT VT = Op.getSimpleValueType(); |
27982 | if (VT == MVT::i16 || VT == MVT::i32) |
27983 | return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); |
27984 | |
27985 | if (VT == MVT::v32i16 || VT == MVT::v64i8) |
27986 | return splitVectorIntBinary(Op, DAG); |
27987 | |
27988 | assert(Op.getSimpleValueType().is256BitVector() && |
27989 | Op.getSimpleValueType().isInteger() && |
27990 | "Only handle AVX 256-bit vector integer operation"); |
27991 | return splitVectorIntBinary(Op, DAG); |
27992 | } |
27993 | |
27994 | static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG, |
27995 | const X86Subtarget &Subtarget) { |
27996 | MVT VT = Op.getSimpleValueType(); |
27997 | SDValue X = Op.getOperand(0), Y = Op.getOperand(1); |
27998 | unsigned Opcode = Op.getOpcode(); |
27999 | SDLoc DL(Op); |
28000 | |
28001 | if (VT == MVT::v32i16 || VT == MVT::v64i8 || |
28002 | (VT.is256BitVector() && !Subtarget.hasInt256())) { |
28003 | assert(Op.getSimpleValueType().isInteger() && |
28004 | "Only handle AVX vector integer operation"); |
28005 | return splitVectorIntBinary(Op, DAG); |
28006 | } |
28007 | |
28008 | |
28009 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
28010 | EVT SetCCResultType = |
28011 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
28012 | |
28013 | if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) { |
28014 | |
28015 | SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y); |
28016 | SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT); |
28017 | |
28018 | if (SetCCResultType == VT && |
28019 | DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits()) |
28020 | return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub); |
28021 | return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT)); |
28022 | } |
28023 | |
28024 | if ((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) && |
28025 | (!VT.isVector() || VT == MVT::v2i64)) { |
28026 | unsigned BitWidth = VT.getScalarSizeInBits(); |
28027 | APInt MinVal = APInt::getSignedMinValue(BitWidth); |
28028 | APInt MaxVal = APInt::getSignedMaxValue(BitWidth); |
28029 | SDValue Zero = DAG.getConstant(0, DL, VT); |
28030 | SDValue Result = |
28031 | DAG.getNode(Opcode == ISD::SADDSAT ? ISD::SADDO : ISD::SSUBO, DL, |
28032 | DAG.getVTList(VT, SetCCResultType), X, Y); |
28033 | SDValue SumDiff = Result.getValue(0); |
28034 | SDValue Overflow = Result.getValue(1); |
28035 | SDValue SatMin = DAG.getConstant(MinVal, DL, VT); |
28036 | SDValue SatMax = DAG.getConstant(MaxVal, DL, VT); |
28037 | SDValue SumNeg = |
28038 | DAG.getSetCC(DL, SetCCResultType, SumDiff, Zero, ISD::SETLT); |
28039 | Result = DAG.getSelect(DL, VT, SumNeg, SatMax, SatMin); |
28040 | return DAG.getSelect(DL, VT, Overflow, Result, SumDiff); |
28041 | } |
28042 | |
28043 | |
28044 | return SDValue(); |
28045 | } |
28046 | |
28047 | static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, |
28048 | SelectionDAG &DAG) { |
28049 | MVT VT = Op.getSimpleValueType(); |
28050 | if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) { |
28051 | |
28052 | |
28053 | SDLoc DL(Op); |
28054 | SDValue N0 = Op.getOperand(0); |
28055 | SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), |
28056 | DAG.getConstant(0, DL, VT), N0); |
28057 | SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_GE, DL, MVT::i8), |
28058 | SDValue(Neg.getNode(), 1)}; |
28059 | return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); |
28060 | } |
28061 | |
28062 | |
28063 | if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) { |
28064 | SDLoc DL(Op); |
28065 | SDValue Src = Op.getOperand(0); |
28066 | SDValue Sub = |
28067 | DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src); |
28068 | return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Sub, Src); |
28069 | } |
28070 | |
28071 | if (VT.is256BitVector() && !Subtarget.hasInt256()) { |
28072 | assert(VT.isInteger() && |
28073 | "Only handle AVX 256-bit vector integer operation"); |
28074 | return splitVectorIntUnary(Op, DAG); |
28075 | } |
28076 | |
28077 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
28078 | return splitVectorIntUnary(Op, DAG); |
28079 | |
28080 | |
28081 | return SDValue(); |
28082 | } |
28083 | |
28084 | static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { |
28085 | MVT VT = Op.getSimpleValueType(); |
28086 | |
28087 | |
28088 | if (VT.getScalarType() != MVT::i64 && VT.is256BitVector()) |
28089 | return splitVectorIntBinary(Op, DAG); |
28090 | |
28091 | if (VT == MVT::v32i16 || VT == MVT::v64i8) |
28092 | return splitVectorIntBinary(Op, DAG); |
28093 | |
28094 | |
28095 | return SDValue(); |
28096 | } |
28097 | |
28098 | static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, |
28099 | SelectionDAG &DAG) { |
28100 | SDLoc dl(Op); |
28101 | MVT VT = Op.getSimpleValueType(); |
28102 | |
28103 | |
28104 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
28105 | return splitVectorIntBinary(Op, DAG); |
28106 | |
28107 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
28108 | return splitVectorIntBinary(Op, DAG); |
28109 | |
28110 | SDValue A = Op.getOperand(0); |
28111 | SDValue B = Op.getOperand(1); |
28112 | |
28113 | |
28114 | |
28115 | if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) { |
28116 | unsigned NumElts = VT.getVectorNumElements(); |
28117 | |
28118 | if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || |
28119 | (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { |
28120 | MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); |
28121 | return DAG.getNode( |
28122 | ISD::TRUNCATE, dl, VT, |
28123 | DAG.getNode(ISD::MUL, dl, ExVT, |
28124 | DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, A), |
28125 | DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, B))); |
28126 | } |
28127 | |
28128 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
28129 | |
28130 | |
28131 | |
28132 | |
28133 | |
28134 | SDValue Undef = DAG.getUNDEF(VT); |
28135 | SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef)); |
28136 | SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef)); |
28137 | |
28138 | SDValue BLo, BHi; |
28139 | if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { |
28140 | |
28141 | SmallVector<SDValue, 16> LoOps, HiOps; |
28142 | for (unsigned i = 0; i != NumElts; i += 16) { |
28143 | for (unsigned j = 0; j != 8; ++j) { |
28144 | LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl, |
28145 | MVT::i16)); |
28146 | HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl, |
28147 | MVT::i16)); |
28148 | } |
28149 | } |
28150 | |
28151 | BLo = DAG.getBuildVector(ExVT, dl, LoOps); |
28152 | BHi = DAG.getBuildVector(ExVT, dl, HiOps); |
28153 | } else { |
28154 | BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef)); |
28155 | BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef)); |
28156 | } |
28157 | |
28158 | |
28159 | SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo); |
28160 | SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); |
28161 | RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT)); |
28162 | RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT)); |
28163 | return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); |
28164 | } |
28165 | |
28166 | |
28167 | if (VT == MVT::v4i32) { |
28168 | assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() && |
28169 | "Should not custom lower when pmulld is available!"); |
28170 | |
28171 | |
28172 | static const int UnpackMask[] = { 1, -1, 3, -1 }; |
28173 | SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); |
28174 | SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); |
28175 | |
28176 | |
28177 | SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, |
28178 | DAG.getBitcast(MVT::v2i64, A), |
28179 | DAG.getBitcast(MVT::v2i64, B)); |
28180 | |
28181 | SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, |
28182 | DAG.getBitcast(MVT::v2i64, Aodds), |
28183 | DAG.getBitcast(MVT::v2i64, Bodds)); |
28184 | |
28185 | Evens = DAG.getBitcast(VT, Evens); |
28186 | Odds = DAG.getBitcast(VT, Odds); |
28187 | |
28188 | |
28189 | |
28190 | static const int ShufMask[] = { 0, 4, 2, 6 }; |
28191 | return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); |
28192 | } |
28193 | |
28194 | assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) && |
28195 | "Only know how to lower V2I64/V4I64/V8I64 multiply"); |
28196 | assert(!Subtarget.hasDQI() && "DQI should use MULLQ"); |
28197 | |
28198 | |
28199 | |
28200 | |
28201 | |
28202 | |
28203 | |
28204 | |
28205 | |
28206 | |
28207 | KnownBits AKnown = DAG.computeKnownBits(A); |
28208 | KnownBits BKnown = DAG.computeKnownBits(B); |
28209 | |
28210 | APInt LowerBitsMask = APInt::getLowBitsSet(64, 32); |
28211 | bool ALoIsZero = LowerBitsMask.isSubsetOf(AKnown.Zero); |
28212 | bool BLoIsZero = LowerBitsMask.isSubsetOf(BKnown.Zero); |
28213 | |
28214 | APInt UpperBitsMask = APInt::getHighBitsSet(64, 32); |
28215 | bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero); |
28216 | bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero); |
28217 | |
28218 | SDValue Zero = DAG.getConstant(0, dl, VT); |
28219 | |
28220 | |
28221 | SDValue AloBlo = Zero; |
28222 | if (!ALoIsZero && !BLoIsZero) |
28223 | AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B); |
28224 | |
28225 | SDValue AloBhi = Zero; |
28226 | if (!ALoIsZero && !BHiIsZero) { |
28227 | SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG); |
28228 | AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi); |
28229 | } |
28230 | |
28231 | SDValue AhiBlo = Zero; |
28232 | if (!AHiIsZero && !BLoIsZero) { |
28233 | SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG); |
28234 | AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B); |
28235 | } |
28236 | |
28237 | SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo); |
28238 | Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG); |
28239 | |
28240 | return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi); |
28241 | } |
28242 | |
28243 | static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl, |
28244 | MVT VT, bool IsSigned, |
28245 | const X86Subtarget &Subtarget, |
28246 | SelectionDAG &DAG, |
28247 | SDValue *Low = nullptr) { |
28248 | unsigned NumElts = VT.getVectorNumElements(); |
28249 | |
28250 | |
28251 | |
28252 | |
28253 | |
28254 | |
28255 | |
28256 | |
28257 | |
28258 | |
28259 | |
28260 | |
28261 | |
28262 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
28263 | SDValue Zero = DAG.getConstant(0, dl, VT); |
28264 | |
28265 | SDValue ALo, AHi; |
28266 | if (IsSigned) { |
28267 | ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A)); |
28268 | AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A)); |
28269 | } else { |
28270 | ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero)); |
28271 | AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero)); |
28272 | } |
28273 | |
28274 | SDValue BLo, BHi; |
28275 | if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { |
28276 | |
28277 | SmallVector<SDValue, 16> LoOps, HiOps; |
28278 | for (unsigned i = 0; i != NumElts; i += 16) { |
28279 | for (unsigned j = 0; j != 8; ++j) { |
28280 | SDValue LoOp = B.getOperand(i + j); |
28281 | SDValue HiOp = B.getOperand(i + j + 8); |
28282 | |
28283 | if (IsSigned) { |
28284 | LoOp = DAG.getAnyExtOrTrunc(LoOp, dl, MVT::i16); |
28285 | HiOp = DAG.getAnyExtOrTrunc(HiOp, dl, MVT::i16); |
28286 | LoOp = DAG.getNode(ISD::SHL, dl, MVT::i16, LoOp, |
28287 | DAG.getConstant(8, dl, MVT::i16)); |
28288 | HiOp = DAG.getNode(ISD::SHL, dl, MVT::i16, HiOp, |
28289 | DAG.getConstant(8, dl, MVT::i16)); |
28290 | } else { |
28291 | LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16); |
28292 | HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16); |
28293 | } |
28294 | |
28295 | LoOps.push_back(LoOp); |
28296 | HiOps.push_back(HiOp); |
28297 | } |
28298 | } |
28299 | |
28300 | BLo = DAG.getBuildVector(ExVT, dl, LoOps); |
28301 | BHi = DAG.getBuildVector(ExVT, dl, HiOps); |
28302 | } else if (IsSigned) { |
28303 | BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B)); |
28304 | BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B)); |
28305 | } else { |
28306 | BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero)); |
28307 | BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero)); |
28308 | } |
28309 | |
28310 | |
28311 | |
28312 | unsigned MulOpc = IsSigned ? ISD::MULHS : ISD::MUL; |
28313 | SDValue RLo = DAG.getNode(MulOpc, dl, ExVT, ALo, BLo); |
28314 | SDValue RHi = DAG.getNode(MulOpc, dl, ExVT, AHi, BHi); |
28315 | |
28316 | if (Low) { |
28317 | |
28318 | SDValue Mask = DAG.getConstant(255, dl, ExVT); |
28319 | SDValue LLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, Mask); |
28320 | SDValue LHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, Mask); |
28321 | *Low = DAG.getNode(X86ISD::PACKUS, dl, VT, LLo, LHi); |
28322 | } |
28323 | |
28324 | RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RLo, 8, DAG); |
28325 | RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG); |
28326 | |
28327 | |
28328 | return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); |
28329 | } |
28330 | |
28331 | static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, |
28332 | SelectionDAG &DAG) { |
28333 | SDLoc dl(Op); |
28334 | MVT VT = Op.getSimpleValueType(); |
28335 | bool IsSigned = Op->getOpcode() == ISD::MULHS; |
28336 | unsigned NumElts = VT.getVectorNumElements(); |
28337 | SDValue A = Op.getOperand(0); |
28338 | SDValue B = Op.getOperand(1); |
28339 | |
28340 | |
28341 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
28342 | return splitVectorIntBinary(Op, DAG); |
28343 | |
28344 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
28345 | return splitVectorIntBinary(Op, DAG); |
28346 | |
28347 | if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) { |
28348 | assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) || |
28349 | (VT == MVT::v8i32 && Subtarget.hasInt256()) || |
28350 | (VT == MVT::v16i32 && Subtarget.hasAVX512())); |
28351 | |
28352 | |
28353 | |
28354 | |
28355 | |
28356 | |
28357 | |
28358 | |
28359 | |
28360 | |
28361 | |
28362 | |
28363 | |
28364 | const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, |
28365 | 9, -1, 11, -1, 13, -1, 15, -1}; |
28366 | |
28367 | SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A, |
28368 | makeArrayRef(&Mask[0], NumElts)); |
28369 | |
28370 | SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B, |
28371 | makeArrayRef(&Mask[0], NumElts)); |
28372 | |
28373 | |
28374 | |
28375 | MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2); |
28376 | unsigned Opcode = |
28377 | (IsSigned && Subtarget.hasSSE41()) ? X86ISD::PMULDQ : X86ISD::PMULUDQ; |
28378 | |
28379 | |
28380 | SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, |
28381 | DAG.getBitcast(MulVT, A), |
28382 | DAG.getBitcast(MulVT, B))); |
28383 | |
28384 | |
28385 | SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, |
28386 | DAG.getBitcast(MulVT, Odd0), |
28387 | DAG.getBitcast(MulVT, Odd1))); |
28388 | |
28389 | |
28390 | SmallVector<int, 16> ShufMask(NumElts); |
28391 | for (int i = 0; i != (int)NumElts; ++i) |
28392 | ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1; |
28393 | |
28394 | SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask); |
28395 | |
28396 | |
28397 | |
28398 | if (IsSigned && !Subtarget.hasSSE41()) { |
28399 | SDValue Zero = DAG.getConstant(0, dl, VT); |
28400 | SDValue T1 = DAG.getNode(ISD::AND, dl, VT, |
28401 | DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B); |
28402 | SDValue T2 = DAG.getNode(ISD::AND, dl, VT, |
28403 | DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A); |
28404 | |
28405 | SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); |
28406 | Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup); |
28407 | } |
28408 | |
28409 | return Res; |
28410 | } |
28411 | |
28412 | |
28413 | assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || |
28414 | (VT == MVT::v64i8 && Subtarget.hasBWI())) && |
28415 | "Unsupported vector type"); |
28416 | |
28417 | |
28418 | |
28419 | |
28420 | |
28421 | |
28422 | |
28423 | if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || |
28424 | (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { |
28425 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); |
28426 | unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
28427 | SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A); |
28428 | SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B); |
28429 | SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB); |
28430 | Mul = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); |
28431 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); |
28432 | } |
28433 | |
28434 | return LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG); |
28435 | } |
28436 | |
28437 | |
28438 | static SDValue LowerMULO(SDValue Op, const X86Subtarget &Subtarget, |
28439 | SelectionDAG &DAG) { |
28440 | MVT VT = Op.getSimpleValueType(); |
28441 | |
28442 | |
28443 | if (!VT.isVector()) |
28444 | return LowerXALUO(Op, DAG); |
28445 | |
28446 | SDLoc dl(Op); |
28447 | bool IsSigned = Op->getOpcode() == ISD::SMULO; |
28448 | SDValue A = Op.getOperand(0); |
28449 | SDValue B = Op.getOperand(1); |
28450 | EVT OvfVT = Op->getValueType(1); |
28451 | |
28452 | if ((VT == MVT::v32i8 && !Subtarget.hasInt256()) || |
28453 | (VT == MVT::v64i8 && !Subtarget.hasBWI())) { |
28454 | |
28455 | SDValue LHSLo, LHSHi; |
28456 | std::tie(LHSLo, LHSHi) = splitVector(A, DAG, dl); |
28457 | |
28458 | |
28459 | SDValue RHSLo, RHSHi; |
28460 | std::tie(RHSLo, RHSHi) = splitVector(B, DAG, dl); |
28461 | |
28462 | EVT LoOvfVT, HiOvfVT; |
28463 | std::tie(LoOvfVT, HiOvfVT) = DAG.GetSplitDestVTs(OvfVT); |
28464 | SDVTList LoVTs = DAG.getVTList(LHSLo.getValueType(), LoOvfVT); |
28465 | SDVTList HiVTs = DAG.getVTList(LHSHi.getValueType(), HiOvfVT); |
28466 | |
28467 | |
28468 | SDValue Lo = DAG.getNode(Op.getOpcode(), dl, LoVTs, LHSLo, RHSLo); |
28469 | SDValue Hi = DAG.getNode(Op.getOpcode(), dl, HiVTs, LHSHi, RHSHi); |
28470 | |
28471 | |
28472 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
28473 | SDValue Ovf = DAG.getNode(ISD::CONCAT_VECTORS, dl, OvfVT, Lo.getValue(1), |
28474 | Hi.getValue(1)); |
28475 | |
28476 | return DAG.getMergeValues({Res, Ovf}, dl); |
28477 | } |
28478 | |
28479 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
28480 | EVT SetccVT = |
28481 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
28482 | |
28483 | if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || |
28484 | (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { |
28485 | unsigned NumElts = VT.getVectorNumElements(); |
28486 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); |
28487 | unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
28488 | SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A); |
28489 | SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B); |
28490 | SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB); |
28491 | |
28492 | SDValue Low = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); |
28493 | |
28494 | SDValue Ovf; |
28495 | if (IsSigned) { |
28496 | SDValue High, LowSign; |
28497 | if (OvfVT.getVectorElementType() == MVT::i1 && |
28498 | (Subtarget.hasBWI() || Subtarget.canExtendTo512DQ())) { |
28499 | |
28500 | |
28501 | High = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Mul, 8, DAG); |
28502 | |
28503 | LowSign = |
28504 | getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExVT, Mul, 8, DAG); |
28505 | LowSign = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, LowSign, |
28506 | 15, DAG); |
28507 | SetccVT = OvfVT; |
28508 | if (!Subtarget.hasBWI()) { |
28509 | |
28510 | High = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v16i32, High); |
28511 | LowSign = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v16i32, LowSign); |
28512 | } |
28513 | } else { |
28514 | |
28515 | High = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); |
28516 | High = DAG.getNode(ISD::TRUNCATE, dl, VT, High); |
28517 | LowSign = |
28518 | DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT)); |
28519 | } |
28520 | |
28521 | Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE); |
28522 | } else { |
28523 | SDValue High = |
28524 | getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); |
28525 | if (OvfVT.getVectorElementType() == MVT::i1 && |
28526 | (Subtarget.hasBWI() || Subtarget.canExtendTo512DQ())) { |
28527 | |
28528 | SetccVT = OvfVT; |
28529 | if (!Subtarget.hasBWI()) { |
28530 | |
28531 | High = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, High); |
28532 | } |
28533 | } else { |
28534 | |
28535 | High = DAG.getNode(ISD::TRUNCATE, dl, VT, High); |
28536 | } |
28537 | |
28538 | Ovf = |
28539 | DAG.getSetCC(dl, SetccVT, High, |
28540 | DAG.getConstant(0, dl, High.getValueType()), ISD::SETNE); |
28541 | } |
28542 | |
28543 | Ovf = DAG.getSExtOrTrunc(Ovf, dl, OvfVT); |
28544 | |
28545 | return DAG.getMergeValues({Low, Ovf}, dl); |
28546 | } |
28547 | |
28548 | SDValue Low; |
28549 | SDValue High = |
28550 | LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG, &Low); |
28551 | |
28552 | SDValue Ovf; |
28553 | if (IsSigned) { |
28554 | |
28555 | SDValue LowSign = |
28556 | DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT)); |
28557 | Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE); |
28558 | } else { |
28559 | |
28560 | Ovf = |
28561 | DAG.getSetCC(dl, SetccVT, High, DAG.getConstant(0, dl, VT), ISD::SETNE); |
28562 | } |
28563 | |
28564 | Ovf = DAG.getSExtOrTrunc(Ovf, dl, OvfVT); |
28565 | |
28566 | return DAG.getMergeValues({Low, Ovf}, dl); |
28567 | } |
28568 | |
28569 | SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { |
28570 | assert(Subtarget.isTargetWin64() && "Unexpected target"); |
28571 | EVT VT = Op.getValueType(); |
28572 | assert(VT.isInteger() && VT.getSizeInBits() == 128 && |
28573 | "Unexpected return type for lowering"); |
28574 | |
28575 | RTLIB::Libcall LC; |
28576 | bool isSigned; |
28577 | switch (Op->getOpcode()) { |
28578 | default: llvm_unreachable("Unexpected request for libcall!"); |
28579 | case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break; |
28580 | case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break; |
28581 | case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break; |
28582 | case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break; |
28583 | } |
28584 | |
28585 | SDLoc dl(Op); |
28586 | SDValue InChain = DAG.getEntryNode(); |
28587 | |
28588 | TargetLowering::ArgListTy Args; |
28589 | TargetLowering::ArgListEntry Entry; |
28590 | for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { |
28591 | EVT ArgVT = Op->getOperand(i).getValueType(); |
28592 | assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 && |
28593 | "Unexpected argument type for lowering"); |
28594 | SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16); |
28595 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
28596 | MachinePointerInfo MPI = |
28597 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
28598 | Entry.Node = StackPtr; |
28599 | InChain = |
28600 | DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MPI, Align(16)); |
28601 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
28602 | Entry.Ty = PointerType::get(ArgTy,0); |
28603 | Entry.IsSExt = false; |
28604 | Entry.IsZExt = false; |
28605 | Args.push_back(Entry); |
28606 | } |
28607 | |
28608 | SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), |
28609 | getPointerTy(DAG.getDataLayout())); |
28610 | |
28611 | TargetLowering::CallLoweringInfo CLI(DAG); |
28612 | CLI.setDebugLoc(dl) |
28613 | .setChain(InChain) |
28614 | .setLibCallee( |
28615 | getLibcallCallingConv(LC), |
28616 | static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee, |
28617 | std::move(Args)) |
28618 | .setInRegister() |
28619 | .setSExtResult(isSigned) |
28620 | .setZExtResult(!isSigned); |
28621 | |
28622 | std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); |
28623 | return DAG.getBitcast(VT, CallInfo.first); |
28624 | } |
28625 | |
28626 | |
28627 | |
28628 | static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget, |
28629 | unsigned Opcode) { |
28630 | if (VT.getScalarSizeInBits() < 16) |
28631 | return false; |
28632 | |
28633 | if (VT.is512BitVector() && Subtarget.hasAVX512() && |
28634 | (VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI())) |
28635 | return true; |
28636 | |
28637 | bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) || |
28638 | (VT.is256BitVector() && Subtarget.hasInt256()); |
28639 | |
28640 | bool AShift = LShift && (Subtarget.hasAVX512() || |
28641 | (VT != MVT::v2i64 && VT != MVT::v4i64)); |
28642 | return (Opcode == ISD::SRA) ? AShift : LShift; |
28643 | } |
28644 | |
28645 | |
28646 | |
28647 | static |
28648 | bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget, |
28649 | unsigned Opcode) { |
28650 | return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); |
28651 | } |
28652 | |
28653 | |
28654 | |
28655 | static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget, |
28656 | unsigned Opcode) { |
28657 | |
28658 | if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16) |
28659 | return false; |
28660 | |
28661 | |
28662 | if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI()) |
28663 | return false; |
28664 | |
28665 | if (Subtarget.hasAVX512()) |
28666 | return true; |
28667 | |
28668 | bool LShift = VT.is128BitVector() || VT.is256BitVector(); |
28669 | bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64; |
28670 | return (Opcode == ISD::SRA) ? AShift : LShift; |
28671 | } |
28672 | |
28673 | static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, |
28674 | const X86Subtarget &Subtarget) { |
28675 | MVT VT = Op.getSimpleValueType(); |
28676 | SDLoc dl(Op); |
28677 | SDValue R = Op.getOperand(0); |
28678 | SDValue Amt = Op.getOperand(1); |
28679 | unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false); |
28680 | |
28681 | auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) { |
28682 | assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type"); |
28683 | MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); |
28684 | SDValue Ex = DAG.getBitcast(ExVT, R); |
28685 | |
28686 | |
28687 | if (ShiftAmt == 63 && Subtarget.hasSSE42()) { |
28688 | assert((VT != MVT::v4i64 || Subtarget.hasInt256()) && |
28689 | "Unsupported PCMPGT op"); |
28690 | return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R); |
28691 | } |
28692 | |
28693 | if (ShiftAmt >= 32) { |
28694 | |
28695 | SDValue Upper = |
28696 | getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG); |
28697 | SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, |
28698 | ShiftAmt - 32, DAG); |
28699 | if (VT == MVT::v2i64) |
28700 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3}); |
28701 | if (VT == MVT::v4i64) |
28702 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, |
28703 | {9, 1, 11, 3, 13, 5, 15, 7}); |
28704 | } else { |
28705 | |
28706 | SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, |
28707 | ShiftAmt, DAG); |
28708 | SDValue Lower = |
28709 | getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG); |
28710 | Lower = DAG.getBitcast(ExVT, Lower); |
28711 | if (VT == MVT::v2i64) |
28712 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3}); |
28713 | if (VT == MVT::v4i64) |
28714 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, |
28715 | {8, 1, 10, 3, 12, 5, 14, 7}); |
28716 | } |
28717 | return DAG.getBitcast(VT, Ex); |
28718 | }; |
28719 | |
28720 | |
28721 | APInt APIntShiftAmt; |
28722 | if (!X86::isConstantSplat(Amt, APIntShiftAmt)) |
28723 | return SDValue(); |
28724 | |
28725 | |
28726 | if (APIntShiftAmt.uge(VT.getScalarSizeInBits())) |
28727 | return DAG.getUNDEF(VT); |
28728 | |
28729 | uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); |
28730 | |
28731 | if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) |
28732 | return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); |
28733 | |
28734 | |
28735 | if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || |
28736 | (Subtarget.hasInt256() && VT == MVT::v4i64)) && |
28737 | Op.getOpcode() == ISD::SRA) |
28738 | return ArithmeticShiftRight64(ShiftAmt); |
28739 | |
28740 | if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) || |
28741 | (Subtarget.hasBWI() && VT == MVT::v64i8)) { |
28742 | unsigned NumElts = VT.getVectorNumElements(); |
28743 | MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
28744 | |
28745 | |
28746 | if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) { |
28747 | |
28748 | |
28749 | |
28750 | |
28751 | |
28752 | R = DAG.getNode(ISD::FREEZE, dl, VT, R); |
28753 | return DAG.getNode(ISD::ADD, dl, VT, R, R); |
28754 | } |
28755 | |
28756 | |
28757 | if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { |
28758 | SDValue Zeros = DAG.getConstant(0, dl, VT); |
28759 | if (VT.is512BitVector()) { |
28760 | assert(VT == MVT::v64i8 && "Unexpected element type!"); |
28761 | SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT); |
28762 | return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP); |
28763 | } |
28764 | return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); |
28765 | } |
28766 | |
28767 | |
28768 | if (VT == MVT::v16i8 && Subtarget.hasXOP()) |
28769 | return SDValue(); |
28770 | |
28771 | if (Op.getOpcode() == ISD::SHL) { |
28772 | |
28773 | SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, |
28774 | ShiftAmt, DAG); |
28775 | SHL = DAG.getBitcast(VT, SHL); |
28776 | |
28777 | APInt Mask = APInt::getHighBitsSet(8, 8 - ShiftAmt); |
28778 | return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(Mask, dl, VT)); |
28779 | } |
28780 | if (Op.getOpcode() == ISD::SRL) { |
28781 | |
28782 | SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R, |
28783 | ShiftAmt, DAG); |
28784 | SRL = DAG.getBitcast(VT, SRL); |
28785 | |
28786 | APInt Mask = APInt::getLowBitsSet(8, 8 - ShiftAmt); |
28787 | return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getConstant(Mask, dl, VT)); |
28788 | } |
28789 | if (Op.getOpcode() == ISD::SRA) { |
28790 | |
28791 | SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); |
28792 | |
28793 | SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT); |
28794 | Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); |
28795 | Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); |
28796 | return Res; |
28797 | } |
28798 | llvm_unreachable("Unknown shift opcode."); |
28799 | } |
28800 | |
28801 | return SDValue(); |
28802 | } |
28803 | |
28804 | static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, |
28805 | const X86Subtarget &Subtarget) { |
28806 | MVT VT = Op.getSimpleValueType(); |
28807 | SDLoc dl(Op); |
28808 | SDValue R = Op.getOperand(0); |
28809 | SDValue Amt = Op.getOperand(1); |
28810 | unsigned Opcode = Op.getOpcode(); |
28811 | unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false); |
28812 | unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true); |
28813 | |
28814 | if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) { |
28815 | if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { |
28816 | MVT EltVT = VT.getVectorElementType(); |
28817 | assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); |
28818 | if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) |
28819 | BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt); |
28820 | else if (EltVT.bitsLT(MVT::i32)) |
28821 | BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); |
28822 | |
28823 | return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG); |
28824 | } |
28825 | |
28826 | |
28827 | if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) || |
28828 | (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) || |
28829 | VT == MVT::v64i8) && |
28830 | !Subtarget.hasXOP()) { |
28831 | unsigned NumElts = VT.getVectorNumElements(); |
28832 | MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
28833 | if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) { |
28834 | unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL); |
28835 | unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false); |
28836 | BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); |
28837 | |
28838 | |
28839 | |
28840 | SDValue BitMask = DAG.getConstant(-1, dl, ExtVT); |
28841 | BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask, |
28842 | BaseShAmt, Subtarget, DAG); |
28843 | if (Opcode != ISD::SHL) |
28844 | BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask, |
28845 | 8, DAG); |
28846 | BitMask = DAG.getBitcast(VT, BitMask); |
28847 | BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask, |
28848 | SmallVector<int, 64>(NumElts, 0)); |
28849 | |
28850 | SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, |
28851 | DAG.getBitcast(ExtVT, R), BaseShAmt, |
28852 | Subtarget, DAG); |
28853 | Res = DAG.getBitcast(VT, Res); |
28854 | Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask); |
28855 | |
28856 | if (Opcode == ISD::SRA) { |
28857 | |
28858 | |
28859 | SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT); |
28860 | SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, |
28861 | BaseShAmt, Subtarget, DAG); |
28862 | SignMask = DAG.getBitcast(VT, SignMask); |
28863 | Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask); |
28864 | Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask); |
28865 | } |
28866 | return Res; |
28867 | } |
28868 | } |
28869 | } |
28870 | |
28871 | |
28872 | if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && |
28873 | Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { |
28874 | Amt = Amt.getOperand(0); |
28875 | unsigned Ratio = 64 / Amt.getScalarValueSizeInBits(); |
28876 | std::vector<SDValue> Vals(Ratio); |
28877 | for (unsigned i = 0; i != Ratio; ++i) |
28878 | Vals[i] = Amt.getOperand(i); |
28879 | for (unsigned i = Ratio, e = Amt.getNumOperands(); i != e; i += Ratio) { |
28880 | for (unsigned j = 0; j != Ratio; ++j) |
28881 | if (Vals[j] != Amt.getOperand(i + j)) |
28882 | return SDValue(); |
28883 | } |
28884 | |
28885 | if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) |
28886 | return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); |
28887 | } |
28888 | return SDValue(); |
28889 | } |
28890 | |
28891 | |
28892 | static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, |
28893 | const X86Subtarget &Subtarget, |
28894 | SelectionDAG &DAG) { |
28895 | MVT VT = Amt.getSimpleValueType(); |
28896 | if (!(VT == MVT::v8i16 || VT == MVT::v4i32 || |
28897 | (Subtarget.hasInt256() && VT == MVT::v16i16) || |
28898 | (Subtarget.hasVBMI2() && VT == MVT::v32i16) || |
28899 | (!Subtarget.hasAVX512() && VT == MVT::v16i8))) |
28900 | return SDValue(); |
28901 | |
28902 | if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { |
28903 | SmallVector<SDValue, 8> Elts; |
28904 | MVT SVT = VT.getVectorElementType(); |
28905 | unsigned SVTBits = SVT.getSizeInBits(); |
28906 | APInt One(SVTBits, 1); |
28907 | unsigned NumElems = VT.getVectorNumElements(); |
28908 | |
28909 | for (unsigned i = 0; i != NumElems; ++i) { |
28910 | SDValue Op = Amt->getOperand(i); |
28911 | if (Op->isUndef()) { |
28912 | Elts.push_back(Op); |
28913 | continue; |
28914 | } |
28915 | |
28916 | ConstantSDNode *ND = cast<ConstantSDNode>(Op); |
28917 | APInt C(SVTBits, ND->getZExtValue()); |
28918 | uint64_t ShAmt = C.getZExtValue(); |
28919 | if (ShAmt >= SVTBits) { |
28920 | Elts.push_back(DAG.getUNDEF(SVT)); |
28921 | continue; |
28922 | } |
28923 | Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT)); |
28924 | } |
28925 | return DAG.getBuildVector(VT, dl, Elts); |
28926 | } |
28927 | |
28928 | |
28929 | |
28930 | if (VT == MVT::v4i32) { |
28931 | Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT)); |
28932 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, |
28933 | DAG.getConstant(0x3f800000U, dl, VT)); |
28934 | Amt = DAG.getBitcast(MVT::v4f32, Amt); |
28935 | return DAG.getNode(ISD::FP_TO_SINT, dl, VT, Amt); |
28936 | } |
28937 | |
28938 | |
28939 | if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) { |
28940 | SDValue Z = DAG.getConstant(0, dl, VT); |
28941 | SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z)); |
28942 | SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z)); |
28943 | Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG); |
28944 | Hi = convertShiftLeftToScale(Hi, dl, Subtarget, DAG); |
28945 | if (Subtarget.hasSSE41()) |
28946 | return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); |
28947 | |
28948 | return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, Lo), |
28949 | DAG.getBitcast(VT, Hi), |
28950 | {0, 2, 4, 6, 8, 10, 12, 14}); |
28951 | } |
28952 | |
28953 | return SDValue(); |
28954 | } |
28955 | |
28956 | static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, |
28957 | SelectionDAG &DAG) { |
28958 | MVT VT = Op.getSimpleValueType(); |
28959 | SDLoc dl(Op); |
28960 | SDValue R = Op.getOperand(0); |
28961 | SDValue Amt = Op.getOperand(1); |
28962 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
28963 | bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); |
28964 | |
28965 | unsigned Opc = Op.getOpcode(); |
28966 | unsigned X86OpcV = getTargetVShiftUniformOpcode(Opc, true); |
28967 | unsigned X86OpcI = getTargetVShiftUniformOpcode(Opc, false); |
28968 | |
28969 | assert(VT.isVector() && "Custom lowering only for vector shifts!"); |
28970 | assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!"); |
28971 | |
28972 | if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget)) |
28973 | return V; |
28974 | |
28975 | if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget)) |
28976 | return V; |
28977 | |
28978 | if (SupportedVectorVarShift(VT, Subtarget, Opc)) |
28979 | return Op; |
28980 | |
28981 | |
28982 | |
28983 | if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 || |
28984 | VT == MVT::v8i16 || VT == MVT::v16i8)) { |
28985 | if (Opc == ISD::SRL || Opc == ISD::SRA) { |
28986 | SDValue Zero = DAG.getConstant(0, dl, VT); |
28987 | Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt); |
28988 | } |
28989 | if (Opc == ISD::SHL || Opc == ISD::SRL) |
28990 | return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt); |
28991 | if (Opc == ISD::SRA) |
28992 | return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt); |
28993 | } |
28994 | |
28995 | |
28996 | |
28997 | if (VT == MVT::v2i64 && Opc != ISD::SRA) { |
28998 | |
28999 | SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0}); |
29000 | SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1}); |
29001 | SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0); |
29002 | SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1); |
29003 | return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3}); |
29004 | } |
29005 | |
29006 | |
29007 | |
29008 | |
29009 | if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) && |
29010 | Opc == ISD::SRA) { |
29011 | SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT); |
29012 | SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); |
29013 | R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); |
29014 | R = DAG.getNode(ISD::XOR, dl, VT, R, M); |
29015 | R = DAG.getNode(ISD::SUB, dl, VT, R, M); |
29016 | return R; |
29017 | } |
29018 | |
29019 | |
29020 | |
29021 | |
29022 | |
29023 | |
29024 | |
29025 | |
29026 | |
29027 | |
29028 | |
29029 | if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32 || |
29030 | (VT == MVT::v16i16 && Subtarget.hasInt256()))) { |
29031 | SDValue Amt1, Amt2; |
29032 | unsigned NumElts = VT.getVectorNumElements(); |
29033 | SmallVector<int, 8> ShuffleMask; |
29034 | for (unsigned i = 0; i != NumElts; ++i) { |
29035 | SDValue A = Amt->getOperand(i); |
29036 | if (A.isUndef()) { |
29037 | ShuffleMask.push_back(SM_SentinelUndef); |
29038 | continue; |
29039 | } |
29040 | if (!Amt1 || Amt1 == A) { |
29041 | ShuffleMask.push_back(i); |
29042 | Amt1 = A; |
29043 | continue; |
29044 | } |
29045 | if (!Amt2 || Amt2 == A) { |
29046 | ShuffleMask.push_back(i + NumElts); |
29047 | Amt2 = A; |
29048 | continue; |
29049 | } |
29050 | break; |
29051 | } |
29052 | |
29053 | |
29054 | if (ShuffleMask.size() == NumElts && Amt1 && Amt2 && |
29055 | (VT != MVT::v16i16 || |
29056 | is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) && |
29057 | (VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL || |
29058 | canWidenShuffleElements(ShuffleMask))) { |
29059 | auto *Cst1 = dyn_cast<ConstantSDNode>(Amt1); |
29060 | auto *Cst2 = dyn_cast<ConstantSDNode>(Amt2); |
29061 | if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) && |
29062 | Cst2->getAPIntValue().ult(EltSizeInBits)) { |
29063 | SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, |
29064 | Cst1->getZExtValue(), DAG); |
29065 | SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, |
29066 | Cst2->getZExtValue(), DAG); |
29067 | return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask); |
29068 | } |
29069 | } |
29070 | } |
29071 | |
29072 | |
29073 | |
29074 | if (Opc == ISD::SHL) |
29075 | if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG)) |
29076 | return DAG.getNode(ISD::MUL, dl, VT, R, Scale); |
29077 | |
29078 | |
29079 | |
29080 | if (Opc == ISD::SRL && ConstantAmt && |
29081 | (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) { |
29082 | SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); |
29083 | SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); |
29084 | if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { |
29085 | SDValue Zero = DAG.getConstant(0, dl, VT); |
29086 | SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ); |
29087 | SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale); |
29088 | return DAG.getSelect(dl, VT, ZAmt, R, Res); |
29089 | } |
29090 | } |
29091 | |
29092 | |
29093 | |
29094 | |
29095 | |
29096 | if (Opc == ISD::SRA && ConstantAmt && |
29097 | (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256())) && |
29098 | ((Subtarget.hasSSE41() && !Subtarget.hasXOP() && |
29099 | !Subtarget.hasAVX512()) || |
29100 | DAG.isKnownNeverZero(Amt))) { |
29101 | SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); |
29102 | SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); |
29103 | if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { |
29104 | SDValue Amt0 = |
29105 | DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ); |
29106 | SDValue Amt1 = |
29107 | DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ); |
29108 | SDValue Sra1 = |
29109 | getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG); |
29110 | SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale); |
29111 | Res = DAG.getSelect(dl, VT, Amt0, R, Res); |
29112 | return DAG.getSelect(dl, VT, Amt1, Sra1, Res); |
29113 | } |
29114 | } |
29115 | |
29116 | |
29117 | |
29118 | |
29119 | |
29120 | |
29121 | if (VT == MVT::v4i32) { |
29122 | SDValue Amt0, Amt1, Amt2, Amt3; |
29123 | if (ConstantAmt) { |
29124 | Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0}); |
29125 | Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1}); |
29126 | Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2}); |
29127 | Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3}); |
29128 | } else { |
29129 | |
29130 | |
29131 | |
29132 | |
29133 | if (Subtarget.hasAVX()) { |
29134 | SDValue Z = DAG.getConstant(0, dl, VT); |
29135 | Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1}); |
29136 | Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1}); |
29137 | Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1}); |
29138 | Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1}); |
29139 | } else { |
29140 | SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt); |
29141 | SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, |
29142 | {4, 5, 6, 7, -1, -1, -1, -1}); |
29143 | Amt0 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, |
29144 | {0, 1, 1, 1, -1, -1, -1, -1}); |
29145 | Amt1 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, |
29146 | {2, 3, 3, 3, -1, -1, -1, -1}); |
29147 | Amt2 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, |
29148 | {0, 1, 1, 1, -1, -1, -1, -1}); |
29149 | Amt3 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, |
29150 | {2, 3, 3, 3, -1, -1, -1, -1}); |
29151 | } |
29152 | } |
29153 | |
29154 | unsigned ShOpc = ConstantAmt ? Opc : X86OpcV; |
29155 | SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0)); |
29156 | SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1)); |
29157 | SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2)); |
29158 | SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3)); |
29159 | |
29160 | |
29161 | |
29162 | if (Subtarget.hasSSE41()) { |
29163 | SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1}); |
29164 | SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7}); |
29165 | return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); |
29166 | } |
29167 | SDValue R01 = DAG.getVectorShuffle(VT, dl, R0, R1, {0, -1, -1, 5}); |
29168 | SDValue R23 = DAG.getVectorShuffle(VT, dl, R2, R3, {2, -1, -1, 7}); |
29169 | return DAG.getVectorShuffle(VT, dl, R01, R23, {0, 3, 4, 7}); |
29170 | } |
29171 | |
29172 | |
29173 | |
29174 | |
29175 | |
29176 | if ((Subtarget.hasInt256() && VT == MVT::v8i16) || |
29177 | (Subtarget.canExtendTo512DQ() && VT == MVT::v16i16) || |
29178 | (Subtarget.canExtendTo512DQ() && VT == MVT::v16i8) || |
29179 | (Subtarget.canExtendTo512BW() && VT == MVT::v32i8) || |
29180 | (Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) { |
29181 | assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) && |
29182 | "Unexpected vector type"); |
29183 | MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32; |
29184 | MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements()); |
29185 | unsigned ExtOpc = Opc == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
29186 | R = DAG.getNode(ExtOpc, dl, ExtVT, R); |
29187 | Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt); |
29188 | return DAG.getNode(ISD::TRUNCATE, dl, VT, |
29189 | DAG.getNode(Opc, dl, ExtVT, R, Amt)); |
29190 | } |
29191 | |
29192 | |
29193 | |
29194 | if (ConstantAmt && (Opc == ISD::SRA || Opc == ISD::SRL) && |
29195 | (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || |
29196 | (VT == MVT::v64i8 && Subtarget.hasBWI())) && |
29197 | !Subtarget.hasXOP()) { |
29198 | int NumElts = VT.getVectorNumElements(); |
29199 | SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8); |
29200 | |
29201 | |
29202 | |
29203 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); |
29204 | Amt = DAG.getZExtOrTrunc(Amt, dl, ExVT); |
29205 | Amt = DAG.getNode(ISD::SUB, dl, ExVT, DAG.getConstant(8, dl, ExVT), Amt); |
29206 | Amt = DAG.getNode(ISD::SHL, dl, ExVT, DAG.getConstant(1, dl, ExVT), Amt); |
29207 | assert(ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()) && |
29208 | "Constant build vector expected"); |
29209 | |
29210 | if (VT == MVT::v16i8 && Subtarget.hasInt256()) { |
29211 | R = Opc == ISD::SRA ? DAG.getSExtOrTrunc(R, dl, ExVT) |
29212 | : DAG.getZExtOrTrunc(R, dl, ExVT); |
29213 | R = DAG.getNode(ISD::MUL, dl, ExVT, R, Amt); |
29214 | R = DAG.getNode(X86ISD::VSRLI, dl, ExVT, R, Cst8); |
29215 | return DAG.getZExtOrTrunc(R, dl, VT); |
29216 | } |
29217 | |
29218 | SmallVector<SDValue, 16> LoAmt, HiAmt; |
29219 | for (int i = 0; i != NumElts; i += 16) { |
29220 | for (int j = 0; j != 8; ++j) { |
29221 | LoAmt.push_back(Amt.getOperand(i + j)); |
29222 | HiAmt.push_back(Amt.getOperand(i + j + 8)); |
29223 | } |
29224 | } |
29225 | |
29226 | MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2); |
29227 | SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt); |
29228 | SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt); |
29229 | |
29230 | SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R)); |
29231 | SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R)); |
29232 | LoR = DAG.getNode(X86OpcI, dl, VT16, LoR, Cst8); |
29233 | HiR = DAG.getNode(X86OpcI, dl, VT16, HiR, Cst8); |
29234 | LoR = DAG.getNode(ISD::MUL, dl, VT16, LoR, LoA); |
29235 | HiR = DAG.getNode(ISD::MUL, dl, VT16, HiR, HiA); |
29236 | LoR = DAG.getNode(X86ISD::VSRLI, dl, VT16, LoR, Cst8); |
29237 | HiR = DAG.getNode(X86ISD::VSRLI, dl, VT16, HiR, Cst8); |
29238 | return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR); |
29239 | } |
29240 | |
29241 | if (VT == MVT::v16i8 || |
29242 | (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) || |
29243 | (VT == MVT::v64i8 && Subtarget.hasBWI())) { |
29244 | MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); |
29245 | |
29246 | auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { |
29247 | if (VT.is512BitVector()) { |
29248 | |
29249 | |
29250 | |
29251 | MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); |
29252 | V0 = DAG.getBitcast(VT, V0); |
29253 | V1 = DAG.getBitcast(VT, V1); |
29254 | Sel = DAG.getBitcast(VT, Sel); |
29255 | Sel = DAG.getSetCC(dl, MaskVT, DAG.getConstant(0, dl, VT), Sel, |
29256 | ISD::SETGT); |
29257 | return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); |
29258 | } else if (Subtarget.hasSSE41()) { |
29259 | |
29260 | |
29261 | V0 = DAG.getBitcast(VT, V0); |
29262 | V1 = DAG.getBitcast(VT, V1); |
29263 | Sel = DAG.getBitcast(VT, Sel); |
29264 | return DAG.getBitcast(SelVT, |
29265 | DAG.getNode(X86ISD::BLENDV, dl, VT, Sel, V0, V1)); |
29266 | } |
29267 | |
29268 | |
29269 | |
29270 | SDValue Z = DAG.getConstant(0, dl, SelVT); |
29271 | SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel); |
29272 | return DAG.getSelect(dl, SelVT, C, V0, V1); |
29273 | }; |
29274 | |
29275 | |
29276 | |
29277 | |
29278 | Amt = DAG.getBitcast(ExtVT, Amt); |
29279 | Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, Amt, 5, DAG); |
29280 | Amt = DAG.getBitcast(VT, Amt); |
29281 | |
29282 | if (Opc == ISD::SHL || Opc == ISD::SRL) { |
29283 | |
29284 | SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT)); |
29285 | R = SignBitSelect(VT, Amt, M, R); |
29286 | |
29287 | |
29288 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
29289 | |
29290 | |
29291 | M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT)); |
29292 | R = SignBitSelect(VT, Amt, M, R); |
29293 | |
29294 | |
29295 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
29296 | |
29297 | |
29298 | M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT)); |
29299 | R = SignBitSelect(VT, Amt, M, R); |
29300 | return R; |
29301 | } |
29302 | |
29303 | if (Opc == ISD::SRA) { |
29304 | |
29305 | |
29306 | |
29307 | SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt); |
29308 | SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt); |
29309 | SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R); |
29310 | SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R); |
29311 | ALo = DAG.getBitcast(ExtVT, ALo); |
29312 | AHi = DAG.getBitcast(ExtVT, AHi); |
29313 | RLo = DAG.getBitcast(ExtVT, RLo); |
29314 | RHi = DAG.getBitcast(ExtVT, RHi); |
29315 | |
29316 | |
29317 | SDValue MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 4, DAG); |
29318 | SDValue MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 4, DAG); |
29319 | RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); |
29320 | RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); |
29321 | |
29322 | |
29323 | ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); |
29324 | AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); |
29325 | |
29326 | |
29327 | MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 2, DAG); |
29328 | MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 2, DAG); |
29329 | RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); |
29330 | RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); |
29331 | |
29332 | |
29333 | ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); |
29334 | AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); |
29335 | |
29336 | |
29337 | MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 1, DAG); |
29338 | MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 1, DAG); |
29339 | RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); |
29340 | RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); |
29341 | |
29342 | |
29343 | |
29344 | RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RLo, 8, DAG); |
29345 | RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RHi, 8, DAG); |
29346 | return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); |
29347 | } |
29348 | } |
29349 | |
29350 | if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { |
29351 | MVT ExtVT = MVT::v8i32; |
29352 | SDValue Z = DAG.getConstant(0, dl, VT); |
29353 | SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z); |
29354 | SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z); |
29355 | SDValue RLo = getUnpackl(DAG, dl, VT, Z, R); |
29356 | SDValue RHi = getUnpackh(DAG, dl, VT, Z, R); |
29357 | ALo = DAG.getBitcast(ExtVT, ALo); |
29358 | AHi = DAG.getBitcast(ExtVT, AHi); |
29359 | RLo = DAG.getBitcast(ExtVT, RLo); |
29360 | RHi = DAG.getBitcast(ExtVT, RHi); |
29361 | SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo); |
29362 | SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi); |
29363 | Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Lo, 16, DAG); |
29364 | Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Hi, 16, DAG); |
29365 | return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); |
29366 | } |
29367 | |
29368 | if (VT == MVT::v8i16) { |
29369 | |
29370 | |
29371 | bool UseSSE41 = Subtarget.hasSSE41() && |
29372 | !ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); |
29373 | |
29374 | auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) { |
29375 | |
29376 | |
29377 | if (UseSSE41) { |
29378 | MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2); |
29379 | V0 = DAG.getBitcast(ExtVT, V0); |
29380 | V1 = DAG.getBitcast(ExtVT, V1); |
29381 | Sel = DAG.getBitcast(ExtVT, Sel); |
29382 | return DAG.getBitcast( |
29383 | VT, DAG.getNode(X86ISD::BLENDV, dl, ExtVT, Sel, V0, V1)); |
29384 | } |
29385 | |
29386 | |
29387 | |
29388 | SDValue C = |
29389 | getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG); |
29390 | return DAG.getSelect(dl, VT, C, V0, V1); |
29391 | }; |
29392 | |
29393 | |
29394 | if (UseSSE41) { |
29395 | |
29396 | |
29397 | Amt = DAG.getNode( |
29398 | ISD::OR, dl, VT, |
29399 | getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG), |
29400 | getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG)); |
29401 | } else { |
29402 | Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG); |
29403 | } |
29404 | |
29405 | |
29406 | SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG); |
29407 | R = SignBitSelect(Amt, M, R); |
29408 | |
29409 | |
29410 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
29411 | |
29412 | |
29413 | M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG); |
29414 | R = SignBitSelect(Amt, M, R); |
29415 | |
29416 | |
29417 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
29418 | |
29419 | |
29420 | M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG); |
29421 | R = SignBitSelect(Amt, M, R); |
29422 | |
29423 | |
29424 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
29425 | |
29426 | |
29427 | M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG); |
29428 | R = SignBitSelect(Amt, M, R); |
29429 | return R; |
29430 | } |
29431 | |
29432 | |
29433 | if (VT.is256BitVector()) |
29434 | return splitVectorIntBinary(Op, DAG); |
29435 | |
29436 | if (VT == MVT::v32i16 || VT == MVT::v64i8) |
29437 | return splitVectorIntBinary(Op, DAG); |
29438 | |
29439 | return SDValue(); |
29440 | } |
29441 | |
29442 | static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, |
29443 | SelectionDAG &DAG) { |
29444 | MVT VT = Op.getSimpleValueType(); |
29445 | assert(VT.isVector() && "Custom lowering only for vector rotates!"); |
29446 | |
29447 | SDLoc DL(Op); |
29448 | SDValue R = Op.getOperand(0); |
29449 | SDValue Amt = Op.getOperand(1); |
29450 | unsigned Opcode = Op.getOpcode(); |
29451 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
29452 | int NumElts = VT.getVectorNumElements(); |
29453 | |
29454 | |
29455 | APInt CstSplatValue; |
29456 | bool IsCstSplat = X86::isConstantSplat(Amt, CstSplatValue); |
29457 | |
29458 | |
29459 | if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0) |
29460 | return R; |
29461 | |
29462 | |
29463 | if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) { |
29464 | |
29465 | if (IsCstSplat) { |
29466 | unsigned RotOpc = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); |
29467 | uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits); |
29468 | return DAG.getNode(RotOpc, DL, VT, R, |
29469 | DAG.getTargetConstant(RotAmt, DL, MVT::i8)); |
29470 | } |
29471 | |
29472 | |
29473 | return Op; |
29474 | } |
29475 | |
29476 | |
29477 | if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) { |
29478 | unsigned FunnelOpc = (Opcode == ISD::ROTL ? ISD::FSHL : ISD::FSHR); |
29479 | return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); |
29480 | } |
29481 | |
29482 | assert((Opcode == ISD::ROTL) && "Only ROTL supported"); |
29483 | |
29484 | |
29485 | |
29486 | |
29487 | if (Subtarget.hasXOP()) { |
29488 | if (VT.is256BitVector()) |
29489 | return splitVectorIntBinary(Op, DAG); |
29490 | assert(VT.is128BitVector() && "Only rotate 128-bit vectors!"); |
29491 | |
29492 | |
29493 | if (IsCstSplat) { |
29494 | uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits); |
29495 | return DAG.getNode(X86ISD::VROTLI, DL, VT, R, |
29496 | DAG.getTargetConstant(RotAmt, DL, MVT::i8)); |
29497 | } |
29498 | |
29499 | |
29500 | return Op; |
29501 | } |
29502 | |
29503 | |
29504 | if (VT.is256BitVector() && !Subtarget.hasAVX2()) |
29505 | return splitVectorIntBinary(Op, DAG); |
29506 | |
29507 | assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || |
29508 | ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8 || |
29509 | VT == MVT::v32i16) && |
29510 | Subtarget.hasAVX2())) && |
29511 | "Only vXi32/vXi16/vXi8 vector rotates supported"); |
29512 | |
29513 | |
29514 | if (IsCstSplat) |
29515 | return SDValue(); |
29516 | |
29517 | bool IsSplatAmt = DAG.isSplatValue(Amt); |
29518 | |
29519 | |
29520 | |
29521 | if (EltSizeInBits == 8 && !IsSplatAmt) { |
29522 | if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) |
29523 | return SDValue(); |
29524 | |
29525 | |
29526 | MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
29527 | |
29528 | auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { |
29529 | if (Subtarget.hasSSE41()) { |
29530 | |
29531 | |
29532 | V0 = DAG.getBitcast(VT, V0); |
29533 | V1 = DAG.getBitcast(VT, V1); |
29534 | Sel = DAG.getBitcast(VT, Sel); |
29535 | return DAG.getBitcast(SelVT, |
29536 | DAG.getNode(X86ISD::BLENDV, DL, VT, Sel, V0, V1)); |
29537 | } |
29538 | |
29539 | |
29540 | |
29541 | SDValue Z = DAG.getConstant(0, DL, SelVT); |
29542 | SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel); |
29543 | return DAG.getSelect(DL, SelVT, C, V0, V1); |
29544 | }; |
29545 | |
29546 | |
29547 | |
29548 | |
29549 | Amt = DAG.getBitcast(ExtVT, Amt); |
29550 | Amt = DAG.getNode(ISD::SHL, DL, ExtVT, Amt, DAG.getConstant(5, DL, ExtVT)); |
29551 | Amt = DAG.getBitcast(VT, Amt); |
29552 | |
29553 | |
29554 | SDValue M; |
29555 | M = DAG.getNode( |
29556 | ISD::OR, DL, VT, |
29557 | DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(4, DL, VT)), |
29558 | DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(4, DL, VT))); |
29559 | R = SignBitSelect(VT, Amt, M, R); |
29560 | |
29561 | |
29562 | Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); |
29563 | |
29564 | |
29565 | M = DAG.getNode( |
29566 | ISD::OR, DL, VT, |
29567 | DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(2, DL, VT)), |
29568 | DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(6, DL, VT))); |
29569 | R = SignBitSelect(VT, Amt, M, R); |
29570 | |
29571 | |
29572 | Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); |
29573 | |
29574 | |
29575 | M = DAG.getNode( |
29576 | ISD::OR, DL, VT, |
29577 | DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(1, DL, VT)), |
29578 | DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(7, DL, VT))); |
29579 | return SignBitSelect(VT, Amt, M, R); |
29580 | } |
29581 | |
29582 | |
29583 | if (SDValue BaseRotAmt = DAG.getSplatValue(Amt)) { |
29584 | |
29585 | |
29586 | Amt = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, BaseRotAmt); |
29587 | Amt = DAG.getNode(ISD::AND, DL, VT, Amt, |
29588 | DAG.getConstant(EltSizeInBits - 1, DL, VT)); |
29589 | Amt = DAG.getVectorShuffle(VT, DL, Amt, DAG.getUNDEF(VT), |
29590 | SmallVector<int>(NumElts, 0)); |
29591 | } else { |
29592 | Amt = DAG.getNode(ISD::AND, DL, VT, Amt, |
29593 | DAG.getConstant(EltSizeInBits - 1, DL, VT)); |
29594 | } |
29595 | |
29596 | bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); |
29597 | bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) && |
29598 | SupportedVectorVarShift(VT, Subtarget, ISD::SRL); |
29599 | |
29600 | |
29601 | |
29602 | if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) { |
29603 | SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT); |
29604 | AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt); |
29605 | SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt); |
29606 | SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR); |
29607 | return DAG.getNode(ISD::OR, DL, VT, SHL, SRL); |
29608 | } |
29609 | |
29610 | |
29611 | SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG); |
29612 | assert(Scale && "Failed to convert ROTL amount to scale"); |
29613 | |
29614 | |
29615 | if (EltSizeInBits == 16) { |
29616 | SDValue Lo = DAG.getNode(ISD::MUL, DL, VT, R, Scale); |
29617 | SDValue Hi = DAG.getNode(ISD::MULHU, DL, VT, R, Scale); |
29618 | return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); |
29619 | } |
29620 | |
29621 | |
29622 | |
29623 | |
29624 | assert(VT == MVT::v4i32 && "Only v4i32 vector rotate expected"); |
29625 | static const int OddMask[] = {1, -1, 3, -1}; |
29626 | SDValue R13 = DAG.getVectorShuffle(VT, DL, R, R, OddMask); |
29627 | SDValue Scale13 = DAG.getVectorShuffle(VT, DL, Scale, Scale, OddMask); |
29628 | |
29629 | SDValue Res02 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64, |
29630 | DAG.getBitcast(MVT::v2i64, R), |
29631 | DAG.getBitcast(MVT::v2i64, Scale)); |
29632 | SDValue Res13 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64, |
29633 | DAG.getBitcast(MVT::v2i64, R13), |
29634 | DAG.getBitcast(MVT::v2i64, Scale13)); |
29635 | Res02 = DAG.getBitcast(VT, Res02); |
29636 | Res13 = DAG.getBitcast(VT, Res13); |
29637 | |
29638 | return DAG.getNode(ISD::OR, DL, VT, |
29639 | DAG.getVectorShuffle(VT, DL, Res02, Res13, {0, 4, 2, 6}), |
29640 | DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7})); |
29641 | } |
29642 | |
29643 | |
29644 | |
29645 | |
29646 | |
29647 | bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { |
29648 | unsigned OpWidth = MemType->getPrimitiveSizeInBits(); |
29649 | |
29650 | if (OpWidth == 64) |
29651 | return Subtarget.hasCmpxchg8b() && !Subtarget.is64Bit(); |
29652 | if (OpWidth == 128) |
29653 | return Subtarget.hasCmpxchg16b(); |
29654 | |
29655 | return false; |
29656 | } |
29657 | |
29658 | bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
29659 | Type *MemType = SI->getValueOperand()->getType(); |
29660 | |
29661 | bool NoImplicitFloatOps = |
29662 | SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); |
29663 | if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && |
29664 | !Subtarget.useSoftFloat() && !NoImplicitFloatOps && |
29665 | (Subtarget.hasSSE1() || Subtarget.hasX87())) |
29666 | return false; |
29667 | |
29668 | return needsCmpXchgNb(MemType); |
29669 | } |
29670 | |
29671 | |
29672 | |
29673 | TargetLowering::AtomicExpansionKind |
29674 | X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
29675 | Type *MemType = LI->getType(); |
29676 | |
29677 | |
29678 | |
29679 | |
29680 | bool NoImplicitFloatOps = |
29681 | LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); |
29682 | if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && |
29683 | !Subtarget.useSoftFloat() && !NoImplicitFloatOps && |
29684 | (Subtarget.hasSSE1() || Subtarget.hasX87())) |
29685 | return AtomicExpansionKind::None; |
29686 | |
29687 | return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg |
29688 | : AtomicExpansionKind::None; |
29689 | } |
29690 | |
29691 | TargetLowering::AtomicExpansionKind |
29692 | X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
29693 | unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; |
29694 | Type *MemType = AI->getType(); |
29695 | |
29696 | |
29697 | |
29698 | if (MemType->getPrimitiveSizeInBits() > NativeWidth) { |
29699 | return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg |
29700 | : AtomicExpansionKind::None; |
29701 | } |
29702 | |
29703 | AtomicRMWInst::BinOp Op = AI->getOperation(); |
29704 | switch (Op) { |
29705 | default: |
29706 | llvm_unreachable("Unknown atomic operation"); |
29707 | case AtomicRMWInst::Xchg: |
29708 | case AtomicRMWInst::Add: |
29709 | case AtomicRMWInst::Sub: |
29710 | |
29711 | return AtomicExpansionKind::None; |
29712 | case AtomicRMWInst::Or: |
29713 | case AtomicRMWInst::And: |
29714 | case AtomicRMWInst::Xor: |
29715 | |
29716 | |
29717 | return !AI->use_empty() ? AtomicExpansionKind::CmpXChg |
29718 | : AtomicExpansionKind::None; |
29719 | case AtomicRMWInst::Nand: |
29720 | case AtomicRMWInst::Max: |
29721 | case AtomicRMWInst::Min: |
29722 | case AtomicRMWInst::UMax: |
29723 | case AtomicRMWInst::UMin: |
29724 | case AtomicRMWInst::FAdd: |
29725 | case AtomicRMWInst::FSub: |
29726 | |
29727 | |
29728 | return AtomicExpansionKind::CmpXChg; |
29729 | } |
29730 | } |
29731 | |
29732 | LoadInst * |
29733 | X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { |
29734 | unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; |
29735 | Type *MemType = AI->getType(); |
29736 | |
29737 | |
29738 | |
29739 | if (MemType->getPrimitiveSizeInBits() > NativeWidth) |
29740 | return nullptr; |
29741 | |
29742 | |
29743 | |
29744 | |
29745 | if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand())) |
29746 | if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() && |
29747 | AI->use_empty()) |
29748 | return nullptr; |
29749 | |
29750 | IRBuilder<> Builder(AI); |
29751 | Module *M = Builder.GetInsertBlock()->getParent()->getParent(); |
29752 | auto SSID = AI->getSyncScopeID(); |
29753 | |
29754 | |
29755 | auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); |
29756 | |
29757 | |
29758 | |
29759 | |
29760 | |
29761 | |
29762 | |
29763 | |
29764 | |
29765 | |
29766 | |
29767 | |
29768 | |
29769 | |
29770 | |
29771 | |
29772 | |
29773 | if (SSID == SyncScope::SingleThread) |
29774 | |
29775 | |
29776 | return nullptr; |
29777 | |
29778 | if (!Subtarget.hasMFence()) |
29779 | |
29780 | |
29781 | |
29782 | |
29783 | return nullptr; |
29784 | |
29785 | Function *MFence = |
29786 | llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence); |
29787 | Builder.CreateCall(MFence, {}); |
29788 | |
29789 | |
29790 | LoadInst *Loaded = Builder.CreateAlignedLoad( |
29791 | AI->getType(), AI->getPointerOperand(), AI->getAlign()); |
29792 | Loaded->setAtomic(Order, SSID); |
29793 | AI->replaceAllUsesWith(Loaded); |
29794 | AI->eraseFromParent(); |
29795 | return Loaded; |
29796 | } |
29797 | |
29798 | bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { |
29799 | if (!SI.isUnordered()) |
29800 | return false; |
29801 | return ExperimentalUnorderedISEL; |
29802 | } |
29803 | bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { |
29804 | if (!LI.isUnordered()) |
29805 | return false; |
29806 | return ExperimentalUnorderedISEL; |
29807 | } |
29808 | |
29809 | |
29810 | |
29811 | |
29812 | |
29813 | |
29814 | static SDValue emitLockedStackOp(SelectionDAG &DAG, |
29815 | const X86Subtarget &Subtarget, SDValue Chain, |
29816 | const SDLoc &DL) { |
29817 | |
29818 | |
29819 | |
29820 | |
29821 | |
29822 | |
29823 | |
29824 | |
29825 | |
29826 | |
29827 | |
29828 | |
29829 | |
29830 | |
29831 | |
29832 | |
29833 | |
29834 | |
29835 | |
29836 | |
29837 | |
29838 | |
29839 | |
29840 | |
29841 | auto &MF = DAG.getMachineFunction(); |
29842 | auto &TFL = *Subtarget.getFrameLowering(); |
29843 | const unsigned SPOffset = TFL.has128ByteRedZone(MF) ? -64 : 0; |
29844 | |
29845 | if (Subtarget.is64Bit()) { |
29846 | SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); |
29847 | SDValue Ops[] = { |
29848 | DAG.getRegister(X86::RSP, MVT::i64), |
29849 | DAG.getTargetConstant(1, DL, MVT::i8), |
29850 | DAG.getRegister(0, MVT::i64), |
29851 | DAG.getTargetConstant(SPOffset, DL, MVT::i32), |
29852 | DAG.getRegister(0, MVT::i16), |
29853 | Zero, |
29854 | Chain}; |
29855 | SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, |
29856 | MVT::Other, Ops); |
29857 | return SDValue(Res, 1); |
29858 | } |
29859 | |
29860 | SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); |
29861 | SDValue Ops[] = { |
29862 | DAG.getRegister(X86::ESP, MVT::i32), |
29863 | DAG.getTargetConstant(1, DL, MVT::i8), |
29864 | DAG.getRegister(0, MVT::i32), |
29865 | DAG.getTargetConstant(SPOffset, DL, MVT::i32), |
29866 | DAG.getRegister(0, MVT::i16), |
29867 | Zero, |
29868 | Chain |
29869 | }; |
29870 | SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, |
29871 | MVT::Other, Ops); |
29872 | return SDValue(Res, 1); |
29873 | } |
29874 | |
29875 | static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, |
29876 | SelectionDAG &DAG) { |
29877 | SDLoc dl(Op); |
29878 | AtomicOrdering FenceOrdering = |
29879 | static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); |
29880 | SyncScope::ID FenceSSID = |
29881 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); |
29882 | |
29883 | |
29884 | |
29885 | if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && |
29886 | FenceSSID == SyncScope::System) { |
29887 | if (Subtarget.hasMFence()) |
29888 | return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); |
29889 | |
29890 | SDValue Chain = Op.getOperand(0); |
29891 | return emitLockedStackOp(DAG, Subtarget, Chain, dl); |
29892 | } |
29893 | |
29894 | |
29895 | return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); |
29896 | } |
29897 | |
29898 | static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget, |
29899 | SelectionDAG &DAG) { |
29900 | MVT T = Op.getSimpleValueType(); |
29901 | SDLoc DL(Op); |
29902 | unsigned Reg = 0; |
29903 | unsigned size = 0; |
29904 | switch(T.SimpleTy) { |
29905 | default: llvm_unreachable("Invalid value type!"); |
29906 | case MVT::i8: Reg = X86::AL; size = 1; break; |
29907 | case MVT::i16: Reg = X86::AX; size = 2; break; |
29908 | case MVT::i32: Reg = X86::EAX; size = 4; break; |
29909 | case MVT::i64: |
29910 | assert(Subtarget.is64Bit() && "Node not type legal!"); |
29911 | Reg = X86::RAX; size = 8; |
29912 | break; |
29913 | } |
29914 | SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, |
29915 | Op.getOperand(2), SDValue()); |
29916 | SDValue Ops[] = { cpIn.getValue(0), |
29917 | Op.getOperand(1), |
29918 | Op.getOperand(3), |
29919 | DAG.getTargetConstant(size, DL, MVT::i8), |
29920 | cpIn.getValue(1) }; |
29921 | SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); |
29922 | MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand(); |
29923 | SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, |
29924 | Ops, T, MMO); |
29925 | |
29926 | SDValue cpOut = |
29927 | DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); |
29928 | SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS, |
29929 | MVT::i32, cpOut.getValue(2)); |
29930 | SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG); |
29931 | |
29932 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), |
29933 | cpOut, Success, EFLAGS.getValue(1)); |
29934 | } |
29935 | |
29936 | |
29937 | static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG, |
29938 | const X86Subtarget &Subtarget) { |
29939 | MVT InVT = V.getSimpleValueType(); |
29940 | |
29941 | if (InVT == MVT::v64i8) { |
29942 | SDValue Lo, Hi; |
29943 | std::tie(Lo, Hi) = DAG.SplitVector(V, DL); |
29944 | Lo = getPMOVMSKB(DL, Lo, DAG, Subtarget); |
29945 | Hi = getPMOVMSKB(DL, Hi, DAG, Subtarget); |
29946 | Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo); |
29947 | Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi); |
29948 | Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi, |
29949 | DAG.getConstant(32, DL, MVT::i8)); |
29950 | return DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi); |
29951 | } |
29952 | if (InVT == MVT::v32i8 && !Subtarget.hasInt256()) { |
29953 | SDValue Lo, Hi; |
29954 | std::tie(Lo, Hi) = DAG.SplitVector(V, DL); |
29955 | Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo); |
29956 | Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi); |
29957 | Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, |
29958 | DAG.getConstant(16, DL, MVT::i8)); |
29959 | return DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi); |
29960 | } |
29961 | |
29962 | return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); |
29963 | } |
29964 | |
29965 | static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, |
29966 | SelectionDAG &DAG) { |
29967 | SDValue Src = Op.getOperand(0); |
29968 | MVT SrcVT = Src.getSimpleValueType(); |
29969 | MVT DstVT = Op.getSimpleValueType(); |
29970 | |
29971 | |
29972 | |
29973 | if (SrcVT == MVT::i64 && DstVT == MVT::v64i1) { |
29974 | assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); |
29975 | assert(Subtarget.hasBWI() && "Expected BWI target"); |
29976 | SDLoc dl(Op); |
29977 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src, |
29978 | DAG.getIntPtrConstant(0, dl)); |
29979 | Lo = DAG.getBitcast(MVT::v32i1, Lo); |
29980 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src, |
29981 | DAG.getIntPtrConstant(1, dl)); |
29982 | Hi = DAG.getBitcast(MVT::v32i1, Hi); |
29983 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); |
29984 | } |
29985 | |
29986 | |
29987 | if ((SrcVT == MVT::v16i1 || SrcVT == MVT::v32i1) && DstVT.isScalarInteger()) { |
29988 | assert(!Subtarget.hasAVX512() && "Should use K-registers with AVX512"); |
29989 | MVT SExtVT = SrcVT == MVT::v16i1 ? MVT::v16i8 : MVT::v32i8; |
29990 | SDLoc DL(Op); |
29991 | SDValue V = DAG.getSExtOrTrunc(Src, DL, SExtVT); |
29992 | V = getPMOVMSKB(DL, V, DAG, Subtarget); |
29993 | return DAG.getZExtOrTrunc(V, DL, DstVT); |
29994 | } |
29995 | |
29996 | assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || |
29997 | SrcVT == MVT::i64) && "Unexpected VT!"); |
29998 | |
29999 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
30000 | if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) && |
30001 | !(DstVT == MVT::x86mmx && SrcVT.isVector())) |
30002 | |
30003 | return SDValue(); |
30004 | |
30005 | SDLoc dl(Op); |
30006 | if (SrcVT.isVector()) { |
30007 | |
30008 | |
30009 | MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(), |
30010 | SrcVT.getVectorNumElements() * 2); |
30011 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, |
30012 | DAG.getUNDEF(SrcVT)); |
30013 | } else { |
30014 | assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && |
30015 | "Unexpected source type in LowerBITCAST"); |
30016 | Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src); |
30017 | } |
30018 | |
30019 | MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64; |
30020 | Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src); |
30021 | |
30022 | if (DstVT == MVT::x86mmx) |
30023 | return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src); |
30024 | |
30025 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src, |
30026 | DAG.getIntPtrConstant(0, dl)); |
30027 | } |
30028 | |
30029 | |
30030 | |
30031 | |
30032 | |
30033 | |
30034 | |
30035 | static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, |
30036 | const X86Subtarget &Subtarget, |
30037 | SelectionDAG &DAG) { |
30038 | SDLoc DL(V); |
30039 | MVT ByteVecVT = V.getSimpleValueType(); |
30040 | MVT EltVT = VT.getVectorElementType(); |
30041 | assert(ByteVecVT.getVectorElementType() == MVT::i8 && |
30042 | "Expected value to have byte element type."); |
30043 | assert(EltVT != MVT::i8 && |
30044 | "Horizontal byte sum only makes sense for wider elements!"); |
30045 | unsigned VecSize = VT.getSizeInBits(); |
30046 | assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!"); |
30047 | |
30048 | |
30049 | |
30050 | if (EltVT == MVT::i64) { |
30051 | SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT); |
30052 | MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); |
30053 | V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros); |
30054 | return DAG.getBitcast(VT, V); |
30055 | } |
30056 | |
30057 | if (EltVT == MVT::i32) { |
30058 | |
30059 | |
30060 | |
30061 | |
30062 | |
30063 | SDValue Zeros = DAG.getConstant(0, DL, VT); |
30064 | SDValue V32 = DAG.getBitcast(VT, V); |
30065 | SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros); |
30066 | SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros); |
30067 | |
30068 | |
30069 | Zeros = DAG.getConstant(0, DL, ByteVecVT); |
30070 | MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); |
30071 | Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, |
30072 | DAG.getBitcast(ByteVecVT, Low), Zeros); |
30073 | High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, |
30074 | DAG.getBitcast(ByteVecVT, High), Zeros); |
30075 | |
30076 | |
30077 | MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16); |
30078 | V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT, |
30079 | DAG.getBitcast(ShortVecVT, Low), |
30080 | DAG.getBitcast(ShortVecVT, High)); |
30081 | |
30082 | return DAG.getBitcast(VT, V); |
30083 | } |
30084 | |
30085 | |
30086 | assert(EltVT == MVT::i16 && "Unknown how to handle type"); |
30087 | |
30088 | |
30089 | |
30090 | |
30091 | |
30092 | SDValue ShifterV = DAG.getConstant(8, DL, VT); |
30093 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV); |
30094 | V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl), |
30095 | DAG.getBitcast(ByteVecVT, V)); |
30096 | return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), ShifterV); |
30097 | } |
30098 | |
30099 | static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL, |
30100 | const X86Subtarget &Subtarget, |
30101 | SelectionDAG &DAG) { |
30102 | MVT VT = Op.getSimpleValueType(); |
30103 | MVT EltVT = VT.getVectorElementType(); |
30104 | int NumElts = VT.getVectorNumElements(); |
30105 | (void)EltVT; |
30106 | assert(EltVT == MVT::i8 && "Only vXi8 vector CTPOP lowering supported."); |
30107 | |
30108 | |
30109 | |
30110 | |
30111 | |
30112 | |
30113 | |
30114 | |
30115 | |
30116 | |
30117 | |
30118 | const int LUT[16] = { 0, 1, 1, 2, |
30119 | 1, 2, 2, 3, |
30120 | 1, 2, 2, 3, |
30121 | 2, 3, 3, 4}; |
30122 | |
30123 | SmallVector<SDValue, 64> LUTVec; |
30124 | for (int i = 0; i < NumElts; ++i) |
30125 | LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); |
30126 | SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec); |
30127 | SDValue M0F = DAG.getConstant(0x0F, DL, VT); |
30128 | |
30129 | |
30130 | SDValue FourV = DAG.getConstant(4, DL, VT); |
30131 | SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV); |
30132 | |
30133 | |
30134 | SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F); |
30135 | |
30136 | |
30137 | |
30138 | |
30139 | SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles); |
30140 | SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles); |
30141 | return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt); |
30142 | } |
30143 | |
30144 | |
30145 | |
30146 | static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, |
30147 | SelectionDAG &DAG) { |
30148 | MVT VT = Op.getSimpleValueType(); |
30149 | assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) && |
30150 | "Unknown CTPOP type to handle"); |
30151 | SDLoc DL(Op.getNode()); |
30152 | SDValue Op0 = Op.getOperand(0); |
30153 | |
30154 | |
30155 | if (Subtarget.hasVPOPCNTDQ()) { |
30156 | unsigned NumElems = VT.getVectorNumElements(); |
30157 | assert((VT.getVectorElementType() == MVT::i8 || |
30158 | VT.getVectorElementType() == MVT::i16) && "Unexpected type"); |
30159 | if (NumElems < 16 || (NumElems == 16 && Subtarget.canExtendTo512DQ())) { |
30160 | MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); |
30161 | Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0); |
30162 | Op = DAG.getNode(ISD::CTPOP, DL, NewVT, Op); |
30163 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); |
30164 | } |
30165 | } |
30166 | |
30167 | |
30168 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
30169 | return splitVectorIntUnary(Op, DAG); |
30170 | |
30171 | |
30172 | if (VT.is512BitVector() && !Subtarget.hasBWI()) |
30173 | return splitVectorIntUnary(Op, DAG); |
30174 | |
30175 | |
30176 | if (VT.getScalarType() != MVT::i8) { |
30177 | MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
30178 | SDValue ByteOp = DAG.getBitcast(ByteVT, Op0); |
30179 | SDValue PopCnt8 = DAG.getNode(ISD::CTPOP, DL, ByteVT, ByteOp); |
30180 | return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG); |
30181 | } |
30182 | |
30183 | |
30184 | if (!Subtarget.hasSSSE3()) |
30185 | return SDValue(); |
30186 | |
30187 | return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG); |
30188 | } |
30189 | |
30190 | static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget, |
30191 | SelectionDAG &DAG) { |
30192 | assert(Op.getSimpleValueType().isVector() && |
30193 | "We only do custom lowering for vector population count."); |
30194 | return LowerVectorCTPOP(Op, Subtarget, DAG); |
30195 | } |
30196 | |
30197 | static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) { |
30198 | MVT VT = Op.getSimpleValueType(); |
30199 | SDValue In = Op.getOperand(0); |
30200 | SDLoc DL(Op); |
30201 | |
30202 | |
30203 | |
30204 | if (!VT.isVector()) { |
30205 | MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits()); |
30206 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In); |
30207 | Res = DAG.getNode(ISD::BITREVERSE, DL, VecVT, Res); |
30208 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Res, |
30209 | DAG.getIntPtrConstant(0, DL)); |
30210 | } |
30211 | |
30212 | int NumElts = VT.getVectorNumElements(); |
30213 | int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; |
30214 | |
30215 | |
30216 | if (VT.is256BitVector()) |
30217 | return splitVectorIntUnary(Op, DAG); |
30218 | |
30219 | assert(VT.is128BitVector() && |
30220 | "Only 128-bit vector bitreverse lowering supported."); |
30221 | |
30222 | |
30223 | |
30224 | |
30225 | |
30226 | SmallVector<SDValue, 16> MaskElts; |
30227 | for (int i = 0; i != NumElts; ++i) { |
30228 | for (int j = ScalarSizeInBytes - 1; j >= 0; --j) { |
30229 | int SourceByte = 16 + (i * ScalarSizeInBytes) + j; |
30230 | int PermuteByte = SourceByte | (2 << 5); |
30231 | MaskElts.push_back(DAG.getConstant(PermuteByte, DL, MVT::i8)); |
30232 | } |
30233 | } |
30234 | |
30235 | SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, MaskElts); |
30236 | SDValue Res = DAG.getBitcast(MVT::v16i8, In); |
30237 | Res = DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, DAG.getUNDEF(MVT::v16i8), |
30238 | Res, Mask); |
30239 | return DAG.getBitcast(VT, Res); |
30240 | } |
30241 | |
30242 | static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, |
30243 | SelectionDAG &DAG) { |
30244 | MVT VT = Op.getSimpleValueType(); |
30245 | |
30246 | if (Subtarget.hasXOP() && !VT.is512BitVector()) |
30247 | return LowerBITREVERSE_XOP(Op, DAG); |
30248 | |
30249 | assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE"); |
30250 | |
30251 | SDValue In = Op.getOperand(0); |
30252 | SDLoc DL(Op); |
30253 | |
30254 | assert(VT.getScalarType() == MVT::i8 && |
30255 | "Only byte vector BITREVERSE supported"); |
30256 | |
30257 | |
30258 | if (VT == MVT::v64i8 && !Subtarget.hasBWI()) |
30259 | return splitVectorIntUnary(Op, DAG); |
30260 | |
30261 | |
30262 | if (VT == MVT::v32i8 && !Subtarget.hasInt256()) |
30263 | return splitVectorIntUnary(Op, DAG); |
30264 | |
30265 | unsigned NumElts = VT.getVectorNumElements(); |
30266 | |
30267 | |
30268 | if (Subtarget.hasGFNI()) { |
30269 | MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8); |
30270 | SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT); |
30271 | Matrix = DAG.getBitcast(VT, Matrix); |
30272 | return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, In, Matrix, |
30273 | DAG.getTargetConstant(0, DL, MVT::i8)); |
30274 | } |
30275 | |
30276 | |
30277 | |
30278 | |
30279 | SDValue NibbleMask = DAG.getConstant(0xF, DL, VT); |
30280 | SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask); |
30281 | SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT)); |
30282 | |
30283 | const int LoLUT[16] = { |
30284 | 0x00, 0x80, 0x40, 0xC0, |
30285 | 0x20, 0xA0, 0x60, 0xE0, |
30286 | 0x10, 0x90, 0x50, 0xD0, |
30287 | 0x30, 0xB0, 0x70, 0xF0}; |
30288 | const int HiLUT[16] = { |
30289 | 0x00, 0x08, 0x04, 0x0C, |
30290 | 0x02, 0x0A, 0x06, 0x0E, |
30291 | 0x01, 0x09, 0x05, 0x0D, |
30292 | 0x03, 0x0B, 0x07, 0x0F}; |
30293 | |
30294 | SmallVector<SDValue, 16> LoMaskElts, HiMaskElts; |
30295 | for (unsigned i = 0; i < NumElts; ++i) { |
30296 | LoMaskElts.push_back(DAG.getConstant(LoLUT[i % 16], DL, MVT::i8)); |
30297 | HiMaskElts.push_back(DAG.getConstant(HiLUT[i % 16], DL, MVT::i8)); |
30298 | } |
30299 | |
30300 | SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts); |
30301 | SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts); |
30302 | Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo); |
30303 | Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi); |
30304 | return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); |
30305 | } |
30306 | |
30307 | static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget, |
30308 | SelectionDAG &DAG) { |
30309 | SDLoc DL(Op); |
30310 | SDValue X = Op.getOperand(0); |
30311 | MVT VT = Op.getSimpleValueType(); |
30312 | |
30313 | |
30314 | if (VT == MVT::i8 || |
30315 | DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) { |
30316 | X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); |
30317 | SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X, |
30318 | DAG.getConstant(0, DL, MVT::i8)); |
30319 | |
30320 | SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); |
30321 | |
30322 | return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); |
30323 | } |
30324 | |
30325 | if (VT == MVT::i64) { |
30326 | |
30327 | SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, |
30328 | DAG.getNode(ISD::SRL, DL, MVT::i64, X, |
30329 | DAG.getConstant(32, DL, MVT::i8))); |
30330 | SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); |
30331 | X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi); |
30332 | } |
30333 | |
30334 | if (VT != MVT::i16) { |
30335 | |
30336 | SDValue Hi16 = DAG.getNode(ISD::SRL, DL, MVT::i32, X, |
30337 | DAG.getConstant(16, DL, MVT::i8)); |
30338 | X = DAG.getNode(ISD::XOR, DL, MVT::i32, X, Hi16); |
30339 | } else { |
30340 | |
30341 | X = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, X); |
30342 | } |
30343 | |
30344 | |
30345 | |
30346 | SDValue Hi = DAG.getNode( |
30347 | ISD::TRUNCATE, DL, MVT::i8, |
30348 | DAG.getNode(ISD::SRL, DL, MVT::i32, X, DAG.getConstant(8, DL, MVT::i8))); |
30349 | SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); |
30350 | SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32); |
30351 | SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1); |
30352 | |
30353 | |
30354 | SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); |
30355 | |
30356 | return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); |
30357 | } |
30358 | |
30359 | static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, |
30360 | const X86Subtarget &Subtarget) { |
30361 | unsigned NewOpc = 0; |
30362 | switch (N->getOpcode()) { |
30363 | case ISD::ATOMIC_LOAD_ADD: |
30364 | NewOpc = X86ISD::LADD; |
30365 | break; |
30366 | case ISD::ATOMIC_LOAD_SUB: |
30367 | NewOpc = X86ISD::LSUB; |
30368 | break; |
30369 | case ISD::ATOMIC_LOAD_OR: |
30370 | NewOpc = X86ISD::LOR; |
30371 | break; |
30372 | case ISD::ATOMIC_LOAD_XOR: |
30373 | NewOpc = X86ISD::LXOR; |
30374 | break; |
30375 | case ISD::ATOMIC_LOAD_AND: |
30376 | NewOpc = X86ISD::LAND; |
30377 | break; |
30378 | default: |
30379 | llvm_unreachable("Unknown ATOMIC_LOAD_ opcode"); |
30380 | } |
30381 | |
30382 | MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand(); |
30383 | |
30384 | return DAG.getMemIntrinsicNode( |
30385 | NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other), |
30386 | {N->getOperand(0), N->getOperand(1), N->getOperand(2)}, |
30387 | N->getSimpleValueType(0), MMO); |
30388 | } |
30389 | |
30390 | |
30391 | static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG, |
30392 | const X86Subtarget &Subtarget) { |
30393 | AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode()); |
30394 | SDValue Chain = N->getOperand(0); |
30395 | SDValue LHS = N->getOperand(1); |
30396 | SDValue RHS = N->getOperand(2); |
30397 | unsigned Opc = N->getOpcode(); |
30398 | MVT VT = N->getSimpleValueType(0); |
30399 | SDLoc DL(N); |
30400 | |
30401 | |
30402 | |
30403 | |
30404 | if (N->hasAnyUseOfValue(0)) { |
30405 | |
30406 | |
30407 | if (Opc == ISD::ATOMIC_LOAD_SUB) { |
30408 | RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); |
30409 | return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, |
30410 | RHS, AN->getMemOperand()); |
30411 | } |
30412 | assert(Opc == ISD::ATOMIC_LOAD_ADD && |
30413 | "Used AtomicRMW ops other than Add should have been expanded!"); |
30414 | return N; |
30415 | } |
30416 | |
30417 | |
30418 | |
30419 | |
30420 | |
30421 | |
30422 | if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS)) { |
30423 | |
30424 | |
30425 | |
30426 | |
30427 | |
30428 | if (AN->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent && |
30429 | AN->getSyncScopeID() == SyncScope::System) { |
30430 | |
30431 | |
30432 | |
30433 | SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL); |
30434 | assert(!N->hasAnyUseOfValue(0)); |
30435 | |
30436 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), |
30437 | DAG.getUNDEF(VT), NewChain); |
30438 | } |
30439 | |
30440 | SDValue NewChain = DAG.getNode(X86ISD::MEMBARRIER, DL, MVT::Other, Chain); |
30441 | assert(!N->hasAnyUseOfValue(0)); |
30442 | |
30443 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), |
30444 | DAG.getUNDEF(VT), NewChain); |
30445 | } |
30446 | |
30447 | SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget); |
30448 | |
30449 | assert(!N->hasAnyUseOfValue(0)); |
30450 | |
30451 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), |
30452 | DAG.getUNDEF(VT), LockOp.getValue(1)); |
30453 | } |
30454 | |
30455 | static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, |
30456 | const X86Subtarget &Subtarget) { |
30457 | auto *Node = cast<AtomicSDNode>(Op.getNode()); |
30458 | SDLoc dl(Node); |
30459 | EVT VT = Node->getMemoryVT(); |
30460 | |
30461 | bool IsSeqCst = |
30462 | Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent; |
30463 | bool IsTypeLegal = DAG.getTargetLoweringInfo().isTypeLegal(VT); |
30464 | |
30465 | |
30466 | |
30467 | if (!IsSeqCst && IsTypeLegal) |
30468 | return Op; |
30469 | |
30470 | if (VT == MVT::i64 && !IsTypeLegal) { |
30471 | |
30472 | |
30473 | bool NoImplicitFloatOps = |
30474 | DAG.getMachineFunction().getFunction().hasFnAttribute( |
30475 | Attribute::NoImplicitFloat); |
30476 | if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { |
30477 | SDValue Chain; |
30478 | if (Subtarget.hasSSE1()) { |
30479 | SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, |
30480 | Node->getOperand(2)); |
30481 | MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32; |
30482 | SclToVec = DAG.getBitcast(StVT, SclToVec); |
30483 | SDVTList Tys = DAG.getVTList(MVT::Other); |
30484 | SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()}; |
30485 | Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, |
30486 | MVT::i64, Node->getMemOperand()); |
30487 | } else if (Subtarget.hasX87()) { |
30488 | |
30489 | |
30490 | SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); |
30491 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
30492 | MachinePointerInfo MPI = |
30493 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
30494 | Chain = |
30495 | DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr, |
30496 | MPI, MaybeAlign(), MachineMemOperand::MOStore); |
30497 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
30498 | SDValue LdOps[] = {Chain, StackPtr}; |
30499 | SDValue Value = |
30500 | DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI, |
30501 | None, MachineMemOperand::MOLoad); |
30502 | Chain = Value.getValue(1); |
30503 | |
30504 | |
30505 | SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()}; |
30506 | Chain = |
30507 | DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other), |
30508 | StoreOps, MVT::i64, Node->getMemOperand()); |
30509 | } |
30510 | |
30511 | if (Chain) { |
30512 | |
30513 | |
30514 | if (IsSeqCst) |
30515 | Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl); |
30516 | |
30517 | return Chain; |
30518 | } |
30519 | } |
30520 | } |
30521 | |
30522 | |
30523 | |
30524 | |
30525 | SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, |
30526 | Node->getMemoryVT(), |
30527 | Node->getOperand(0), |
30528 | Node->getOperand(1), Node->getOperand(2), |
30529 | Node->getMemOperand()); |
30530 | return Swap.getValue(1); |
30531 | } |
30532 | |
30533 | static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { |
30534 | SDNode *N = Op.getNode(); |
30535 | MVT VT = N->getSimpleValueType(0); |
30536 | unsigned Opc = Op.getOpcode(); |
30537 | |
30538 | |
30539 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
30540 | return SDValue(); |
30541 | |
30542 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
30543 | SDLoc DL(N); |
30544 | |
30545 | |
30546 | SDValue Carry = Op.getOperand(2); |
30547 | EVT CarryVT = Carry.getValueType(); |
30548 | Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), |
30549 | Carry, DAG.getAllOnesConstant(DL, CarryVT)); |
30550 | |
30551 | bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY; |
30552 | SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs, |
30553 | Op.getOperand(0), Op.getOperand(1), |
30554 | Carry.getValue(1)); |
30555 | |
30556 | bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY; |
30557 | SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B, |
30558 | Sum.getValue(1), DL, DAG); |
30559 | if (N->getValueType(1) == MVT::i1) |
30560 | SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); |
30561 | |
30562 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); |
30563 | } |
30564 | |
30565 | static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, |
30566 | SelectionDAG &DAG) { |
30567 | assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit()); |
30568 | |
30569 | |
30570 | |
30571 | |
30572 | SDLoc dl(Op); |
30573 | SDValue Arg = Op.getOperand(0); |
30574 | EVT ArgVT = Arg.getValueType(); |
30575 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
30576 | |
30577 | TargetLowering::ArgListTy Args; |
30578 | TargetLowering::ArgListEntry Entry; |
30579 | |
30580 | Entry.Node = Arg; |
30581 | Entry.Ty = ArgTy; |
30582 | Entry.IsSExt = false; |
30583 | Entry.IsZExt = false; |
30584 | Args.push_back(Entry); |
30585 | |
30586 | bool isF64 = ArgVT == MVT::f64; |
30587 | |
30588 | |
30589 | |
30590 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
30591 | RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; |
30592 | const char *LibcallName = TLI.getLibcallName(LC); |
30593 | SDValue Callee = |
30594 | DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); |
30595 | |
30596 | Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) |
30597 | : (Type *)FixedVectorType::get(ArgTy, 4); |
30598 | |
30599 | TargetLowering::CallLoweringInfo CLI(DAG); |
30600 | CLI.setDebugLoc(dl) |
30601 | .setChain(DAG.getEntryNode()) |
30602 | .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)); |
30603 | |
30604 | std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); |
30605 | |
30606 | if (isF64) |
30607 | |
30608 | return CallResult.first; |
30609 | |
30610 | |
30611 | SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, |
30612 | CallResult.first, DAG.getIntPtrConstant(0, dl)); |
30613 | SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, |
30614 | CallResult.first, DAG.getIntPtrConstant(1, dl)); |
30615 | SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); |
30616 | return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); |
30617 | } |
30618 | |
30619 | |
30620 | |
30621 | static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, |
30622 | bool FillWithZeroes = false) { |
30623 | |
30624 | MVT InVT = InOp.getSimpleValueType(); |
30625 | if (InVT == NVT) |
30626 | return InOp; |
30627 | |
30628 | if (InOp.isUndef()) |
30629 | return DAG.getUNDEF(NVT); |
30630 | |
30631 | assert(InVT.getVectorElementType() == NVT.getVectorElementType() && |
30632 | "input and widen element type must match"); |
30633 | |
30634 | unsigned InNumElts = InVT.getVectorNumElements(); |
30635 | unsigned WidenNumElts = NVT.getVectorNumElements(); |
30636 | assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && |
30637 | "Unexpected request for vector widening"); |
30638 | |
30639 | SDLoc dl(InOp); |
30640 | if (InOp.getOpcode() == ISD::CONCAT_VECTORS && |
30641 | InOp.getNumOperands() == 2) { |
30642 | SDValue N1 = InOp.getOperand(1); |
30643 | if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || |
30644 | N1.isUndef()) { |
30645 | InOp = InOp.getOperand(0); |
30646 | InVT = InOp.getSimpleValueType(); |
30647 | InNumElts = InVT.getVectorNumElements(); |
30648 | } |
30649 | } |
30650 | if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || |
30651 | ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { |
30652 | SmallVector<SDValue, 16> Ops; |
30653 | for (unsigned i = 0; i < InNumElts; ++i) |
30654 | Ops.push_back(InOp.getOperand(i)); |
30655 | |
30656 | EVT EltVT = InOp.getOperand(0).getValueType(); |
30657 | |
30658 | SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : |
30659 | DAG.getUNDEF(EltVT); |
30660 | for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) |
30661 | Ops.push_back(FillVal); |
30662 | return DAG.getBuildVector(NVT, dl, Ops); |
30663 | } |
30664 | SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : |
30665 | DAG.getUNDEF(NVT); |
30666 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, |
30667 | InOp, DAG.getIntPtrConstant(0, dl)); |
30668 | } |
30669 | |
30670 | static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, |
30671 | SelectionDAG &DAG) { |
30672 | assert(Subtarget.hasAVX512() && |
30673 | "MGATHER/MSCATTER are supported on AVX-512 arch only"); |
30674 | |
30675 | MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode()); |
30676 | SDValue Src = N->getValue(); |
30677 | MVT VT = Src.getSimpleValueType(); |
30678 | assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); |
30679 | SDLoc dl(Op); |
30680 | |
30681 | SDValue Scale = N->getScale(); |
30682 | SDValue Index = N->getIndex(); |
30683 | SDValue Mask = N->getMask(); |
30684 | SDValue Chain = N->getChain(); |
30685 | SDValue BasePtr = N->getBasePtr(); |
30686 | |
30687 | if (VT == MVT::v2f32 || VT == MVT::v2i32) { |
30688 | assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); |
30689 | |
30690 | if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { |
30691 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
30692 | EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); |
30693 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT)); |
30694 | SDVTList VTs = DAG.getVTList(MVT::Other); |
30695 | SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; |
30696 | return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops, |
30697 | N->getMemoryVT(), N->getMemOperand()); |
30698 | } |
30699 | return SDValue(); |
30700 | } |
30701 | |
30702 | MVT IndexVT = Index.getSimpleValueType(); |
30703 | |
30704 | |
30705 | |
30706 | if (IndexVT == MVT::v2i32) |
30707 | return SDValue(); |
30708 | |
30709 | |
30710 | |
30711 | if (!Subtarget.hasVLX() && !VT.is512BitVector() && |
30712 | !Index.getSimpleValueType().is512BitVector()) { |
30713 | |
30714 | unsigned Factor = std::min(512/VT.getSizeInBits(), |
30715 | 512/IndexVT.getSizeInBits()); |
30716 | unsigned NumElts = VT.getVectorNumElements() * Factor; |
30717 | |
30718 | VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); |
30719 | IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); |
30720 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
30721 | |
30722 | Src = ExtendToType(Src, VT, DAG); |
30723 | Index = ExtendToType(Index, IndexVT, DAG); |
30724 | Mask = ExtendToType(Mask, MaskVT, DAG, true); |
30725 | } |
30726 | |
30727 | SDVTList VTs = DAG.getVTList(MVT::Other); |
30728 | SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; |
30729 | return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops, |
30730 | N->getMemoryVT(), N->getMemOperand()); |
30731 | } |
30732 | |
30733 | static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, |
30734 | SelectionDAG &DAG) { |
30735 | |
30736 | MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode()); |
30737 | MVT VT = Op.getSimpleValueType(); |
30738 | MVT ScalarVT = VT.getScalarType(); |
30739 | SDValue Mask = N->getMask(); |
30740 | MVT MaskVT = Mask.getSimpleValueType(); |
30741 | SDValue PassThru = N->getPassThru(); |
30742 | SDLoc dl(Op); |
30743 | |
30744 | |
30745 | if (MaskVT.getVectorElementType() != MVT::i1) { |
30746 | |
30747 | if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode())) |
30748 | return Op; |
30749 | |
30750 | SDValue NewLoad = DAG.getMaskedLoad( |
30751 | VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, |
30752 | getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(), |
30753 | N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(), |
30754 | N->isExpandingLoad()); |
30755 | |
30756 | SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); |
30757 | return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl); |
30758 | } |
30759 | |
30760 | assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) && |
30761 | "Expanding masked load is supported on AVX-512 target only!"); |
30762 | |
30763 | assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && |
30764 | "Expanding masked load is supported for 32 and 64-bit types only!"); |
30765 | |
30766 | assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && |
30767 | "Cannot lower masked load op."); |
30768 | |
30769 | assert((ScalarVT.getSizeInBits() >= 32 || |
30770 | (Subtarget.hasBWI() && |
30771 | (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && |
30772 | "Unsupported masked load op."); |
30773 | |
30774 | |
30775 | |
30776 | unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits(); |
30777 | MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); |
30778 | PassThru = ExtendToType(PassThru, WideDataVT, DAG); |
30779 | |
30780 | |
30781 | assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && |
30782 | "Unexpected mask type"); |
30783 | |
30784 | MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); |
30785 | |
30786 | Mask = ExtendToType(Mask, WideMaskVT, DAG, true); |
30787 | SDValue NewLoad = DAG.getMaskedLoad( |
30788 | WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, |
30789 | PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), |
30790 | N->getExtensionType(), N->isExpandingLoad()); |
30791 | |
30792 | SDValue Extract = |
30793 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), |
30794 | DAG.getIntPtrConstant(0, dl)); |
30795 | SDValue RetOps[] = {Extract, NewLoad.getValue(1)}; |
30796 | return DAG.getMergeValues(RetOps, dl); |
30797 | } |
30798 | |
30799 | static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, |
30800 | SelectionDAG &DAG) { |
30801 | MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode()); |
30802 | SDValue DataToStore = N->getValue(); |
30803 | MVT VT = DataToStore.getSimpleValueType(); |
30804 | MVT ScalarVT = VT.getScalarType(); |
30805 | SDValue Mask = N->getMask(); |
30806 | SDLoc dl(Op); |
30807 | |
30808 | assert((!N->isCompressingStore() || Subtarget.hasAVX512()) && |
30809 | "Expanding masked load is supported on AVX-512 target only!"); |
30810 | |
30811 | assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && |
30812 | "Expanding masked load is supported for 32 and 64-bit types only!"); |
30813 | |
30814 | assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && |
30815 | "Cannot lower masked store op."); |
30816 | |
30817 | assert((ScalarVT.getSizeInBits() >= 32 || |
30818 | (Subtarget.hasBWI() && |
30819 | (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && |
30820 | "Unsupported masked store op."); |
30821 | |
30822 | |
30823 | |
30824 | unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); |
30825 | MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); |
30826 | |
30827 | |
30828 | assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && |
30829 | "Unexpected mask type"); |
30830 | |
30831 | MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); |
30832 | |
30833 | DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); |
30834 | Mask = ExtendToType(Mask, WideMaskVT, DAG, true); |
30835 | return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), |
30836 | N->getOffset(), Mask, N->getMemoryVT(), |
30837 | N->getMemOperand(), N->getAddressingMode(), |
30838 | N->isTruncatingStore(), N->isCompressingStore()); |
30839 | } |
30840 | |
30841 | static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, |
30842 | SelectionDAG &DAG) { |
30843 | assert(Subtarget.hasAVX2() && |
30844 | "MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only"); |
30845 | |
30846 | MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode()); |
30847 | SDLoc dl(Op); |
30848 | MVT VT = Op.getSimpleValueType(); |
30849 | SDValue Index = N->getIndex(); |
30850 | SDValue Mask = N->getMask(); |
30851 | SDValue PassThru = N->getPassThru(); |
30852 | MVT IndexVT = Index.getSimpleValueType(); |
30853 | |
30854 | assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); |
30855 | |
30856 | |
30857 | if (IndexVT == MVT::v2i32) |
30858 | return SDValue(); |
30859 | |
30860 | |
30861 | |
30862 | MVT OrigVT = VT; |
30863 | if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && |
30864 | !IndexVT.is512BitVector()) { |
30865 | |
30866 | unsigned Factor = std::min(512/VT.getSizeInBits(), |
30867 | 512/IndexVT.getSizeInBits()); |
30868 | |
30869 | unsigned NumElts = VT.getVectorNumElements() * Factor; |
30870 | |
30871 | VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); |
30872 | IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); |
30873 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
30874 | |
30875 | PassThru = ExtendToType(PassThru, VT, DAG); |
30876 | Index = ExtendToType(Index, IndexVT, DAG); |
30877 | Mask = ExtendToType(Mask, MaskVT, DAG, true); |
30878 | } |
30879 | |
30880 | SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, |
30881 | N->getScale() }; |
30882 | SDValue NewGather = DAG.getMemIntrinsicNode( |
30883 | X86ISD::MGATHER, dl, DAG.getVTList(VT, MVT::Other), Ops, N->getMemoryVT(), |
30884 | N->getMemOperand()); |
30885 | SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT, |
30886 | NewGather, DAG.getIntPtrConstant(0, dl)); |
30887 | return DAG.getMergeValues({Extract, NewGather.getValue(1)}, dl); |
30888 | } |
30889 | |
30890 | static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) { |
30891 | SDLoc dl(Op); |
30892 | SDValue Src = Op.getOperand(0); |
30893 | MVT DstVT = Op.getSimpleValueType(); |
30894 | |
30895 | AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode()); |
30896 | unsigned SrcAS = N->getSrcAddressSpace(); |
30897 | |
30898 | assert(SrcAS != N->getDestAddressSpace() && |
30899 | "addrspacecast must be between different address spaces"); |
30900 | |
30901 | if (SrcAS == X86AS::PTR32_UPTR && DstVT == MVT::i64) { |
30902 | Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Src); |
30903 | } else if (DstVT == MVT::i64) { |
30904 | Op = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Src); |
30905 | } else if (DstVT == MVT::i32) { |
30906 | Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src); |
30907 | } else { |
30908 | report_fatal_error("Bad address space in addrspacecast"); |
30909 | } |
30910 | return Op; |
30911 | } |
30912 | |
30913 | SDValue X86TargetLowering::LowerGC_TRANSITION(SDValue Op, |
30914 | SelectionDAG &DAG) const { |
30915 | |
30916 | |
30917 | |
30918 | |
30919 | |
30920 | |
30921 | SmallVector<SDValue, 2> Ops; |
30922 | |
30923 | Ops.push_back(Op.getOperand(0)); |
30924 | if (Op->getGluedNode()) |
30925 | Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); |
30926 | |
30927 | SDLoc OpDL(Op); |
30928 | SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); |
30929 | SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); |
30930 | |
30931 | return NOOP; |
30932 | } |
30933 | |
30934 | |
30935 | static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) { |
30936 | SDLoc dl(Op); |
30937 | EVT VT = Op.getValueType(); |
30938 | SDValue Lo, Hi; |
30939 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); |
30940 | EVT LoVT, HiVT; |
30941 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
30942 | SDValue RC = Op.getOperand(1); |
30943 | Lo = DAG.getNode(X86ISD::CVTPS2PH, dl, LoVT, Lo, RC); |
30944 | Hi = DAG.getNode(X86ISD::CVTPS2PH, dl, HiVT, Hi, RC); |
30945 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
30946 | } |
30947 | |
30948 | |
30949 | SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
30950 | switch (Op.getOpcode()) { |
30951 | default: llvm_unreachable("Should not custom lower this!"); |
30952 | case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); |
30953 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
30954 | return LowerCMP_SWAP(Op, Subtarget, DAG); |
30955 | case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG); |
30956 | case ISD::ATOMIC_LOAD_ADD: |
30957 | case ISD::ATOMIC_LOAD_SUB: |
30958 | case ISD::ATOMIC_LOAD_OR: |
30959 | case ISD::ATOMIC_LOAD_XOR: |
30960 | case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget); |
30961 | case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget); |
30962 | case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG); |
30963 | case ISD::PARITY: return LowerPARITY(Op, Subtarget, DAG); |
30964 | case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); |
30965 | case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG); |
30966 | case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG); |
30967 | case ISD::VSELECT: return LowerVSELECT(Op, DAG); |
30968 | case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); |
30969 | case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); |
30970 | case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); |
30971 | case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); |
30972 | case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG); |
30973 | case ISD::ConstantPool: return LowerConstantPool(Op, DAG); |
30974 | case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); |
30975 | case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); |
30976 | case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); |
30977 | case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); |
30978 | case ISD::SHL_PARTS: |
30979 | case ISD::SRA_PARTS: |
30980 | case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); |
30981 | case ISD::FSHL: |
30982 | case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG); |
30983 | case ISD::STRICT_SINT_TO_FP: |
30984 | case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); |
30985 | case ISD::STRICT_UINT_TO_FP: |
30986 | case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); |
30987 | case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); |
30988 | case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG); |
30989 | case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG); |
30990 | case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG); |
30991 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
30992 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
30993 | return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG); |
30994 | case ISD::FP_TO_SINT: |
30995 | case ISD::STRICT_FP_TO_SINT: |
30996 | case ISD::FP_TO_UINT: |
30997 | case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); |
30998 | case ISD::FP_TO_SINT_SAT: |
30999 | case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG); |
31000 | case ISD::FP_EXTEND: |
31001 | case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG); |
31002 | case ISD::FP_ROUND: |
31003 | case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); |
31004 | case ISD::FP16_TO_FP: |
31005 | case ISD::STRICT_FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG); |
31006 | case ISD::FP_TO_FP16: |
31007 | case ISD::STRICT_FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); |
31008 | case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); |
31009 | case ISD::STORE: return LowerStore(Op, Subtarget, DAG); |
31010 | case ISD::FADD: |
31011 | case ISD::FSUB: return lowerFaddFsub(Op, DAG); |
31012 | case ISD::FROUND: return LowerFROUND(Op, DAG); |
31013 | case ISD::FABS: |
31014 | case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); |
31015 | case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); |
31016 | case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); |
31017 | case ISD::LRINT: |
31018 | case ISD::LLRINT: return LowerLRINT_LLRINT(Op, DAG); |
31019 | case ISD::SETCC: |
31020 | case ISD::STRICT_FSETCC: |
31021 | case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG); |
31022 | case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); |
31023 | case ISD::SELECT: return LowerSELECT(Op, DAG); |
31024 | case ISD::BRCOND: return LowerBRCOND(Op, DAG); |
31025 | case ISD::JumpTable: return LowerJumpTable(Op, DAG); |
31026 | case ISD::VASTART: return LowerVASTART(Op, DAG); |
31027 | case ISD::VAARG: return LowerVAARG(Op, DAG); |
31028 | case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); |
31029 | case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
31030 | case ISD::INTRINSIC_VOID: |
31031 | case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); |
31032 | case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); |
31033 | case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); |
31034 | case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); |
31035 | case ISD::FRAME_TO_ARGS_OFFSET: |
31036 | return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); |
31037 | case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); |
31038 | case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); |
31039 | case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); |
31040 | case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); |
31041 | case ISD::EH_SJLJ_SETUP_DISPATCH: |
31042 | return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); |
31043 | case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); |
31044 | case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); |
31045 | case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); |
31046 | case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG); |
31047 | case ISD::CTLZ: |
31048 | case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG); |
31049 | case ISD::CTTZ: |
31050 | case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, Subtarget, DAG); |
31051 | case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); |
31052 | case ISD::MULHS: |
31053 | case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG); |
31054 | case ISD::ROTL: |
31055 | case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG); |
31056 | case ISD::SRA: |
31057 | case ISD::SRL: |
31058 | case ISD::SHL: return LowerShift(Op, Subtarget, DAG); |
31059 | case ISD::SADDO: |
31060 | case ISD::UADDO: |
31061 | case ISD::SSUBO: |
31062 | case ISD::USUBO: return LowerXALUO(Op, DAG); |
31063 | case ISD::SMULO: |
31064 | case ISD::UMULO: return LowerMULO(Op, Subtarget, DAG); |
31065 | case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); |
31066 | case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); |
31067 | case ISD::SADDO_CARRY: |
31068 | case ISD::SSUBO_CARRY: |
31069 | case ISD::ADDCARRY: |
31070 | case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); |
31071 | case ISD::ADD: |
31072 | case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget); |
31073 | case ISD::UADDSAT: |
31074 | case ISD::SADDSAT: |
31075 | case ISD::USUBSAT: |
31076 | case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG, Subtarget); |
31077 | case ISD::SMAX: |
31078 | case ISD::SMIN: |
31079 | case ISD::UMAX: |
31080 | case ISD::UMIN: return LowerMINMAX(Op, DAG); |
31081 | case ISD::ABS: return LowerABS(Op, Subtarget, DAG); |
31082 | case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); |
31083 | case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); |
31084 | case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); |
31085 | case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); |
31086 | case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); |
31087 | case ISD::GC_TRANSITION_START: |
31088 | case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION(Op, DAG); |
31089 | case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG); |
31090 | case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG); |
31091 | } |
31092 | } |
31093 | |
31094 | |
31095 | |
31096 | void X86TargetLowering::ReplaceNodeResults(SDNode *N, |
31097 | SmallVectorImpl<SDValue>&Results, |
31098 | SelectionDAG &DAG) const { |
31099 | SDLoc dl(N); |
31100 | switch (N->getOpcode()) { |
31101 | default: |
31102 | #ifndef NDEBUG |
31103 | dbgs() << "ReplaceNodeResults: "; |
31104 | N->dump(&DAG); |
31105 | #endif |
31106 | llvm_unreachable("Do not know how to custom type legalize this operation!"); |
31107 | case X86ISD::CVTPH2PS: { |
31108 | EVT VT = N->getValueType(0); |
31109 | SDValue Lo, Hi; |
31110 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); |
31111 | EVT LoVT, HiVT; |
31112 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
31113 | Lo = DAG.getNode(X86ISD::CVTPH2PS, dl, LoVT, Lo); |
31114 | Hi = DAG.getNode(X86ISD::CVTPH2PS, dl, HiVT, Hi); |
31115 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
31116 | Results.push_back(Res); |
31117 | return; |
31118 | } |
31119 | case X86ISD::STRICT_CVTPH2PS: { |
31120 | EVT VT = N->getValueType(0); |
31121 | SDValue Lo, Hi; |
31122 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 1); |
31123 | EVT LoVT, HiVT; |
31124 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
31125 | Lo = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {LoVT, MVT::Other}, |
31126 | {N->getOperand(0), Lo}); |
31127 | Hi = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {HiVT, MVT::Other}, |
31128 | {N->getOperand(0), Hi}); |
31129 | SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
31130 | Lo.getValue(1), Hi.getValue(1)); |
31131 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
31132 | Results.push_back(Res); |
31133 | Results.push_back(Chain); |
31134 | return; |
31135 | } |
31136 | case X86ISD::CVTPS2PH: |
31137 | Results.push_back(LowerCVTPS2PH(SDValue(N, 0), DAG)); |
31138 | return; |
31139 | case ISD::CTPOP: { |
31140 | assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); |
31141 | |
31142 | bool NoImplicitFloatOps = |
31143 | DAG.getMachineFunction().getFunction().hasFnAttribute( |
31144 | Attribute::NoImplicitFloat); |
31145 | if (isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) { |
31146 | SDValue Wide = |
31147 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0)); |
31148 | Wide = DAG.getNode(ISD::CTPOP, dl, MVT::v2i64, Wide); |
31149 | |
31150 | |
31151 | Wide = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Wide); |
31152 | Wide = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Wide, |
31153 | DAG.getIntPtrConstant(0, dl)); |
31154 | Wide = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Wide); |
31155 | Results.push_back(Wide); |
31156 | } |
31157 | return; |
31158 | } |
31159 | case ISD::MUL: { |
31160 | EVT VT = N->getValueType(0); |
31161 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
31162 | VT.getVectorElementType() == MVT::i8 && "Unexpected VT!"); |
31163 | |
31164 | |
31165 | MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); |
31166 | SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); |
31167 | SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); |
31168 | SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); |
31169 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
31170 | unsigned NumConcats = 16 / VT.getVectorNumElements(); |
31171 | SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT)); |
31172 | ConcatOps[0] = Res; |
31173 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); |
31174 | Results.push_back(Res); |
31175 | return; |
31176 | } |
31177 | case X86ISD::VPMADDWD: |
31178 | case X86ISD::AVG: { |
31179 | |
31180 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
31181 | |
31182 | EVT VT = N->getValueType(0); |
31183 | EVT InVT = N->getOperand(0).getValueType(); |
31184 | assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 && |
31185 | "Expected a VT that divides into 128 bits."); |
31186 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
31187 | "Unexpected type action!"); |
31188 | unsigned NumConcat = 128 / InVT.getSizeInBits(); |
31189 | |
31190 | EVT InWideVT = EVT::getVectorVT(*DAG.getContext(), |
31191 | InVT.getVectorElementType(), |
31192 | NumConcat * InVT.getVectorNumElements()); |
31193 | EVT WideVT = EVT::getVectorVT(*DAG.getContext(), |
31194 | VT.getVectorElementType(), |
31195 | NumConcat * VT.getVectorNumElements()); |
31196 | |
31197 | SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); |
31198 | Ops[0] = N->getOperand(0); |
31199 | SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); |
31200 | Ops[0] = N->getOperand(1); |
31201 | SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); |
31202 | |
31203 | SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1); |
31204 | Results.push_back(Res); |
31205 | return; |
31206 | } |
31207 | |
31208 | case X86ISD::FMINC: |
31209 | case X86ISD::FMIN: |
31210 | case X86ISD::FMAXC: |
31211 | case X86ISD::FMAX: { |
31212 | EVT VT = N->getValueType(0); |
31213 | assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX."); |
31214 | SDValue UNDEF = DAG.getUNDEF(VT); |
31215 | SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, |
31216 | N->getOperand(0), UNDEF); |
31217 | SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, |
31218 | N->getOperand(1), UNDEF); |
31219 | Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS)); |
31220 | return; |
31221 | } |
31222 | case ISD::SDIV: |
31223 | case ISD::UDIV: |
31224 | case ISD::SREM: |
31225 | case ISD::UREM: { |
31226 | EVT VT = N->getValueType(0); |
31227 | if (VT.isVector()) { |
31228 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
31229 | "Unexpected type action!"); |
31230 | |
31231 | |
31232 | |
31233 | APInt SplatVal; |
31234 | if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) { |
31235 | unsigned NumConcats = 128 / VT.getSizeInBits(); |
31236 | SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT)); |
31237 | Ops0[0] = N->getOperand(0); |
31238 | EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT); |
31239 | SDValue N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Ops0); |
31240 | SDValue N1 = DAG.getConstant(SplatVal, dl, ResVT); |
31241 | SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1); |
31242 | Results.push_back(Res); |
31243 | } |
31244 | return; |
31245 | } |
31246 | |
31247 | SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG); |
31248 | Results.push_back(V); |
31249 | return; |
31250 | } |
31251 | case ISD::TRUNCATE: { |
31252 | MVT VT = N->getSimpleValueType(0); |
31253 | if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) |
31254 | return; |
31255 | |
31256 | |
31257 | |
31258 | |
31259 | MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT(); |
31260 | SDValue In = N->getOperand(0); |
31261 | EVT InVT = In.getValueType(); |
31262 | |
31263 | unsigned InBits = InVT.getSizeInBits(); |
31264 | if (128 % InBits == 0) { |
31265 | |
31266 | |
31267 | |
31268 | MVT InEltVT = InVT.getSimpleVT().getVectorElementType(); |
31269 | EVT EltVT = VT.getVectorElementType(); |
31270 | unsigned WidenNumElts = WidenVT.getVectorNumElements(); |
31271 | SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); |
31272 | |
31273 | |
31274 | unsigned MinElts = VT.getVectorNumElements(); |
31275 | for (unsigned i=0; i < MinElts; ++i) { |
31276 | SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In, |
31277 | DAG.getIntPtrConstant(i, dl)); |
31278 | Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val); |
31279 | } |
31280 | Results.push_back(DAG.getBuildVector(WidenVT, dl, Ops)); |
31281 | return; |
31282 | } |
31283 | |
31284 | |
31285 | |
31286 | if (Subtarget.hasAVX512() && isTypeLegal(InVT)) { |
31287 | |
31288 | if ((InBits == 256 && Subtarget.hasVLX()) || InBits == 512) { |
31289 | Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In)); |
31290 | return; |
31291 | } |
31292 | |
31293 | if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) { |
31294 | In = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i64, In, |
31295 | DAG.getUNDEF(MVT::v4i64)); |
31296 | Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In)); |
31297 | return; |
31298 | } |
31299 | } |
31300 | if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 && |
31301 | getTypeAction(*DAG.getContext(), InVT) == TypeSplitVector && |
31302 | isTypeLegal(MVT::v4i64)) { |
31303 | |
31304 | |
31305 | SDValue Lo, Hi; |
31306 | std::tie(Lo, Hi) = DAG.SplitVector(In, dl); |
31307 | |
31308 | Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo); |
31309 | Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi); |
31310 | SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi, |
31311 | { 0, 1, 2, 3, 16, 17, 18, 19, |
31312 | -1, -1, -1, -1, -1, -1, -1, -1 }); |
31313 | Results.push_back(Res); |
31314 | return; |
31315 | } |
31316 | |
31317 | return; |
31318 | } |
31319 | case ISD::ANY_EXTEND: |
31320 | |
31321 | |
31322 | assert(N->getValueType(0) == MVT::v8i8 && |
31323 | "Do not know how to legalize this Node"); |
31324 | return; |
31325 | case ISD::SIGN_EXTEND: |
31326 | case ISD::ZERO_EXTEND: { |
31327 | EVT VT = N->getValueType(0); |
31328 | SDValue In = N->getOperand(0); |
31329 | EVT InVT = In.getValueType(); |
31330 | if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && |
31331 | (InVT == MVT::v4i16 || InVT == MVT::v4i8)){ |
31332 | assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector && |
31333 | "Unexpected type action!"); |
31334 | assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode"); |
31335 | |
31336 | |
31337 | |
31338 | |
31339 | In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); |
31340 | |
31341 | |
31342 | SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32); |
31343 | SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT); |
31344 | |
31345 | |
31346 | |
31347 | SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, |
31348 | {0, 4, 1, 5}); |
31349 | Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo); |
31350 | SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, |
31351 | {2, 6, 3, 7}); |
31352 | Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi); |
31353 | |
31354 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
31355 | Results.push_back(Res); |
31356 | return; |
31357 | } |
31358 | |
31359 | if (VT == MVT::v16i32 || VT == MVT::v8i64) { |
31360 | if (!InVT.is128BitVector()) { |
31361 | |
31362 | |
31363 | if (getTypeAction(*DAG.getContext(), InVT) != TypePromoteInteger) |
31364 | return; |
31365 | InVT = getTypeToTransformTo(*DAG.getContext(), InVT); |
31366 | if (!InVT.is128BitVector()) |
31367 | return; |
31368 | |
31369 | |
31370 | |
31371 | In = DAG.getNode(N->getOpcode(), dl, InVT, In); |
31372 | } |
31373 | |
31374 | |
31375 | |
31376 | EVT LoVT, HiVT; |
31377 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); |
31378 | assert(isTypeLegal(LoVT) && "Split VT not legal?"); |
31379 | |
31380 | SDValue Lo = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, LoVT, In, DAG); |
31381 | |
31382 | |
31383 | unsigned NumElts = InVT.getVectorNumElements(); |
31384 | unsigned HalfNumElts = NumElts / 2; |
31385 | SmallVector<int, 16> ShufMask(NumElts, SM_SentinelUndef); |
31386 | for (unsigned i = 0; i != HalfNumElts; ++i) |
31387 | ShufMask[i] = i + HalfNumElts; |
31388 | |
31389 | SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask); |
31390 | Hi = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, HiVT, Hi, DAG); |
31391 | |
31392 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
31393 | Results.push_back(Res); |
31394 | } |
31395 | return; |
31396 | } |
31397 | case ISD::FP_TO_SINT: |
31398 | case ISD::STRICT_FP_TO_SINT: |
31399 | case ISD::FP_TO_UINT: |
31400 | case ISD::STRICT_FP_TO_UINT: { |
31401 | bool IsStrict = N->isStrictFPOpcode(); |
31402 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || |
31403 | N->getOpcode() == ISD::STRICT_FP_TO_SINT; |
31404 | EVT VT = N->getValueType(0); |
31405 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
31406 | EVT SrcVT = Src.getValueType(); |
31407 | |
31408 | if (VT.isVector() && Subtarget.hasFP16() && |
31409 | SrcVT.getVectorElementType() == MVT::f16) { |
31410 | EVT EleVT = VT.getVectorElementType(); |
31411 | EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16; |
31412 | |
31413 | if (SrcVT != MVT::v8f16) { |
31414 | SDValue Tmp = |
31415 | IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT); |
31416 | SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp); |
31417 | Ops[0] = Src; |
31418 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops); |
31419 | } |
31420 | |
31421 | SDValue Res, Chain; |
31422 | if (IsStrict) { |
31423 | unsigned Opc = |
31424 | IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
31425 | Res = |
31426 | DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src}); |
31427 | Chain = Res.getValue(1); |
31428 | } else { |
31429 | unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
31430 | Res = DAG.getNode(Opc, dl, ResVT, Src); |
31431 | } |
31432 | |
31433 | |
31434 | if (EleVT.getSizeInBits() < 16) { |
31435 | MVT TmpVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8); |
31436 | Res = DAG.getNode(ISD::TRUNCATE, dl, TmpVT, Res); |
31437 | |
31438 | |
31439 | unsigned NumConcats = 128 / TmpVT.getSizeInBits(); |
31440 | MVT ConcatVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8 * NumConcats); |
31441 | SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(TmpVT)); |
31442 | ConcatOps[0] = Res; |
31443 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps); |
31444 | } |
31445 | |
31446 | Results.push_back(Res); |
31447 | if (IsStrict) |
31448 | Results.push_back(Chain); |
31449 | |
31450 | return; |
31451 | } |
31452 | |
31453 | if (VT.isVector() && VT.getScalarSizeInBits() < 32) { |
31454 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
31455 | "Unexpected type action!"); |
31456 | |
31457 | |
31458 | unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U); |
31459 | MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth), |
31460 | VT.getVectorNumElements()); |
31461 | SDValue Res; |
31462 | SDValue Chain; |
31463 | if (IsStrict) { |
31464 | Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {PromoteVT, MVT::Other}, |
31465 | {N->getOperand(0), Src}); |
31466 | Chain = Res.getValue(1); |
31467 | } else |
31468 | Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src); |
31469 | |
31470 | |
31471 | |
31472 | if (PromoteVT == MVT::v2i32) |
31473 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res, |
31474 | DAG.getUNDEF(MVT::v2i32)); |
31475 | |
31476 | Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, dl, |
31477 | Res.getValueType(), Res, |
31478 | DAG.getValueType(VT.getVectorElementType())); |
31479 | |
31480 | if (PromoteVT == MVT::v2i32) |
31481 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, |
31482 | DAG.getIntPtrConstant(0, dl)); |
31483 | |
31484 | |
31485 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
31486 | |
31487 | |
31488 | unsigned NumConcats = 128 / VT.getSizeInBits(); |
31489 | MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(), |
31490 | VT.getVectorNumElements() * NumConcats); |
31491 | SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT)); |
31492 | ConcatOps[0] = Res; |
31493 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps); |
31494 | Results.push_back(Res); |
31495 | if (IsStrict) |
31496 | Results.push_back(Chain); |
31497 | return; |
31498 | } |
31499 | |
31500 | |
31501 | if (VT == MVT::v2i32) { |
31502 | assert((!IsStrict || IsSigned || Subtarget.hasAVX512()) && |
31503 | "Strict unsigned conversion requires AVX512"); |
31504 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
31505 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
31506 | "Unexpected type action!"); |
31507 | if (Src.getValueType() == MVT::v2f64) { |
31508 | if (!IsSigned && !Subtarget.hasAVX512()) { |
31509 | SDValue Res = |
31510 | expandFP_TO_UINT_SSE(MVT::v4i32, Src, dl, DAG, Subtarget); |
31511 | Results.push_back(Res); |
31512 | return; |
31513 | } |
31514 | |
31515 | unsigned Opc; |
31516 | if (IsStrict) |
31517 | Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
31518 | else |
31519 | Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
31520 | |
31521 | |
31522 | if (!IsSigned && !Subtarget.hasVLX()) { |
31523 | |
31524 | |
31525 | |
31526 | |
31527 | |
31528 | if (!IsStrict) |
31529 | return; |
31530 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, Src, |
31531 | DAG.getConstantFP(0.0, dl, MVT::v2f64)); |
31532 | Opc = N->getOpcode(); |
31533 | } |
31534 | SDValue Res; |
31535 | SDValue Chain; |
31536 | if (IsStrict) { |
31537 | Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other}, |
31538 | {N->getOperand(0), Src}); |
31539 | Chain = Res.getValue(1); |
31540 | } else { |
31541 | Res = DAG.getNode(Opc, dl, MVT::v4i32, Src); |
31542 | } |
31543 | Results.push_back(Res); |
31544 | if (IsStrict) |
31545 | Results.push_back(Chain); |
31546 | return; |
31547 | } |
31548 | |
31549 | |
31550 | |
31551 | if (Src.getValueType() == MVT::v2f32 && IsStrict) { |
31552 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, |
31553 | DAG.getConstantFP(0.0, dl, MVT::v2f32)); |
31554 | SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4i32, MVT::Other}, |
31555 | {N->getOperand(0), Src}); |
31556 | Results.push_back(Res); |
31557 | Results.push_back(Res.getValue(1)); |
31558 | return; |
31559 | } |
31560 | |
31561 | |
31562 | |
31563 | return; |
31564 | } |
31565 | |
31566 | assert(!VT.isVector() && "Vectors should have been handled above!"); |
31567 | |
31568 | if ((Subtarget.hasDQI() && VT == MVT::i64 && |
31569 | (SrcVT == MVT::f32 || SrcVT == MVT::f64)) || |
31570 | (Subtarget.hasFP16() && SrcVT == MVT::f16)) { |
31571 | assert(!Subtarget.is64Bit() && "i64 should be legal"); |
31572 | unsigned NumElts = Subtarget.hasVLX() ? 2 : 8; |
31573 | |
31574 | unsigned SrcElts = |
31575 | std::max(NumElts, 128U / (unsigned)SrcVT.getSizeInBits()); |
31576 | MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts); |
31577 | MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), SrcElts); |
31578 | unsigned Opc = N->getOpcode(); |
31579 | if (NumElts != SrcElts) { |
31580 | if (IsStrict) |
31581 | Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
31582 | else |
31583 | Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
31584 | } |
31585 | |
31586 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); |
31587 | SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT, |
31588 | DAG.getConstantFP(0.0, dl, VecInVT), Src, |
31589 | ZeroIdx); |
31590 | SDValue Chain; |
31591 | if (IsStrict) { |
31592 | SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); |
31593 | Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res); |
31594 | Chain = Res.getValue(1); |
31595 | } else |
31596 | Res = DAG.getNode(Opc, SDLoc(N), VecVT, Res); |
31597 | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx); |
31598 | Results.push_back(Res); |
31599 | if (IsStrict) |
31600 | Results.push_back(Chain); |
31601 | return; |
31602 | } |
31603 | |
31604 | SDValue Chain; |
31605 | if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, Chain)) { |
31606 | Results.push_back(V); |
31607 | if (IsStrict) |
31608 | Results.push_back(Chain); |
31609 | } |
31610 | return; |
31611 | } |
31612 | case ISD::LRINT: |
31613 | case ISD::LLRINT: { |
31614 | if (SDValue V = LRINT_LLRINTHelper(N, DAG)) |
31615 | Results.push_back(V); |
31616 | return; |
31617 | } |
31618 | |
31619 | case ISD::SINT_TO_FP: |
31620 | case ISD::STRICT_SINT_TO_FP: |
31621 | case ISD::UINT_TO_FP: |
31622 | case ISD::STRICT_UINT_TO_FP: { |
31623 | bool IsStrict = N->isStrictFPOpcode(); |
31624 | bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP || |
31625 | N->getOpcode() == ISD::STRICT_SINT_TO_FP; |
31626 | EVT VT = N->getValueType(0); |
31627 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
31628 | if (VT.getVectorElementType() == MVT::f16 && Subtarget.hasFP16() && |
31629 | Subtarget.hasVLX()) { |
31630 | if (Src.getValueType().getVectorElementType() == MVT::i16) |
31631 | return; |
31632 | |
31633 | if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32) |
31634 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
31635 | IsStrict ? DAG.getConstant(0, dl, MVT::v2i32) |
31636 | : DAG.getUNDEF(MVT::v2i32)); |
31637 | if (IsStrict) { |
31638 | unsigned Opc = |
31639 | IsSigned ? X86ISD::STRICT_CVTSI2P : X86ISD::STRICT_CVTUI2P; |
31640 | SDValue Res = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other}, |
31641 | {N->getOperand(0), Src}); |
31642 | Results.push_back(Res); |
31643 | Results.push_back(Res.getValue(1)); |
31644 | } else { |
31645 | unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P; |
31646 | Results.push_back(DAG.getNode(Opc, dl, MVT::v8f16, Src)); |
31647 | } |
31648 | return; |
31649 | } |
31650 | if (VT != MVT::v2f32) |
31651 | return; |
31652 | EVT SrcVT = Src.getValueType(); |
31653 | if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) { |
31654 | if (IsStrict) { |
31655 | unsigned Opc = IsSigned ? X86ISD::STRICT_CVTSI2P |
31656 | : X86ISD::STRICT_CVTUI2P; |
31657 | SDValue Res = DAG.getNode(Opc, dl, {MVT::v4f32, MVT::Other}, |
31658 | {N->getOperand(0), Src}); |
31659 | Results.push_back(Res); |
31660 | Results.push_back(Res.getValue(1)); |
31661 | } else { |
31662 | unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P; |
31663 | Results.push_back(DAG.getNode(Opc, dl, MVT::v4f32, Src)); |
31664 | } |
31665 | return; |
31666 | } |
31667 | if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() && |
31668 | Subtarget.hasSSE41() && !Subtarget.hasAVX512()) { |
31669 | SDValue Zero = DAG.getConstant(0, dl, SrcVT); |
31670 | SDValue One = DAG.getConstant(1, dl, SrcVT); |
31671 | SDValue Sign = DAG.getNode(ISD::OR, dl, SrcVT, |
31672 | DAG.getNode(ISD::SRL, dl, SrcVT, Src, One), |
31673 | DAG.getNode(ISD::AND, dl, SrcVT, Src, One)); |
31674 | SDValue IsNeg = DAG.getSetCC(dl, MVT::v2i64, Src, Zero, ISD::SETLT); |
31675 | SDValue SignSrc = DAG.getSelect(dl, SrcVT, IsNeg, Sign, Src); |
31676 | SmallVector<SDValue, 4> SignCvts(4, DAG.getConstantFP(0.0, dl, MVT::f32)); |
31677 | for (int i = 0; i != 2; ++i) { |
31678 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, |
31679 | SignSrc, DAG.getIntPtrConstant(i, dl)); |
31680 | if (IsStrict) |
31681 | SignCvts[i] = |
31682 | DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {MVT::f32, MVT::Other}, |
31683 | {N->getOperand(0), Elt}); |
31684 | else |
31685 | SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Elt); |
31686 | }; |
31687 | SDValue SignCvt = DAG.getBuildVector(MVT::v4f32, dl, SignCvts); |
31688 | SDValue Slow, Chain; |
31689 | if (IsStrict) { |
31690 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
31691 | SignCvts[0].getValue(1), SignCvts[1].getValue(1)); |
31692 | Slow = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v4f32, MVT::Other}, |
31693 | {Chain, SignCvt, SignCvt}); |
31694 | Chain = Slow.getValue(1); |
31695 | } else { |
31696 | Slow = DAG.getNode(ISD::FADD, dl, MVT::v4f32, SignCvt, SignCvt); |
31697 | } |
31698 | IsNeg = DAG.getBitcast(MVT::v4i32, IsNeg); |
31699 | IsNeg = |
31700 | DAG.getVectorShuffle(MVT::v4i32, dl, IsNeg, IsNeg, {1, 3, -1, -1}); |
31701 | SDValue Cvt = DAG.getSelect(dl, MVT::v4f32, IsNeg, Slow, SignCvt); |
31702 | Results.push_back(Cvt); |
31703 | if (IsStrict) |
31704 | Results.push_back(Chain); |
31705 | return; |
31706 | } |
31707 | |
31708 | if (SrcVT != MVT::v2i32) |
31709 | return; |
31710 | |
31711 | if (IsSigned || Subtarget.hasAVX512()) { |
31712 | if (!IsStrict) |
31713 | return; |
31714 | |
31715 | |
31716 | |
31717 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
31718 | DAG.getConstant(0, dl, MVT::v2i32)); |
31719 | SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other}, |
31720 | {N->getOperand(0), Src}); |
31721 | Results.push_back(Res); |
31722 | Results.push_back(Res.getValue(1)); |
31723 | return; |
31724 | } |
31725 | |
31726 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
31727 | SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src); |
31728 | SDValue VBias = |
31729 | DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64); |
31730 | SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, |
31731 | DAG.getBitcast(MVT::v2i64, VBias)); |
31732 | Or = DAG.getBitcast(MVT::v2f64, Or); |
31733 | if (IsStrict) { |
31734 | SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other}, |
31735 | {N->getOperand(0), Or, VBias}); |
31736 | SDValue Res = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, |
31737 | {MVT::v4f32, MVT::Other}, |
31738 | {Sub.getValue(1), Sub}); |
31739 | Results.push_back(Res); |
31740 | Results.push_back(Res.getValue(1)); |
31741 | } else { |
31742 | |
31743 | SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); |
31744 | Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); |
31745 | } |
31746 | return; |
31747 | } |
31748 | case ISD::STRICT_FP_ROUND: |
31749 | case ISD::FP_ROUND: { |
31750 | bool IsStrict = N->isStrictFPOpcode(); |
31751 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
31752 | EVT VT = N->getValueType(0); |
31753 | EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32; |
31754 | if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) { |
31755 | SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32) |
31756 | : DAG.getUNDEF(MVT::v2f32); |
31757 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext); |
31758 | } |
31759 | if (!isTypeLegal(Src.getValueType())) |
31760 | return; |
31761 | SDValue V; |
31762 | if (IsStrict) |
31763 | V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other}, |
31764 | {N->getOperand(0), Src}); |
31765 | else |
31766 | V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src); |
31767 | Results.push_back(V); |
31768 | if (IsStrict) |
31769 | Results.push_back(V.getValue(1)); |
31770 | return; |
31771 | } |
31772 | case ISD::FP_EXTEND: |
31773 | case ISD::STRICT_FP_EXTEND: { |
31774 | |
31775 | |
31776 | assert(N->getValueType(0) == MVT::v2f32 && |
31777 | "Do not know how to legalize this Node"); |
31778 | if (!Subtarget.hasFP16() || !Subtarget.hasVLX()) |
31779 | return; |
31780 | bool IsStrict = N->isStrictFPOpcode(); |
31781 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
31782 | SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f16) |
31783 | : DAG.getUNDEF(MVT::v2f16); |
31784 | SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src, Ext); |
31785 | if (IsStrict) |
31786 | V = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::v4f32, MVT::Other}, |
31787 | {N->getOperand(0), V}); |
31788 | else |
31789 | V = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, V); |
31790 | Results.push_back(V); |
31791 | if (IsStrict) |
31792 | Results.push_back(V.getValue(1)); |
31793 | return; |
31794 | } |
31795 | case ISD::INTRINSIC_W_CHAIN: { |
31796 | unsigned IntNo = N->getConstantOperandVal(1); |
31797 | switch (IntNo) { |
31798 | default : llvm_unreachable("Do not know how to custom type " |
31799 | "legalize this intrinsic operation!"); |
31800 | case Intrinsic::x86_rdtsc: |
31801 | return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, |
31802 | Results); |
31803 | case Intrinsic::x86_rdtscp: |
31804 | return getReadTimeStampCounter(N, dl, X86::RDTSCP, DAG, Subtarget, |
31805 | Results); |
31806 | case Intrinsic::x86_rdpmc: |
31807 | expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget, |
31808 | Results); |
31809 | return; |
31810 | case Intrinsic::x86_xgetbv: |
31811 | expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget, |
31812 | Results); |
31813 | return; |
31814 | } |
31815 | } |
31816 | case ISD::READCYCLECOUNTER: { |
31817 | return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, Results); |
31818 | } |
31819 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { |
31820 | EVT T = N->getValueType(0); |
31821 | assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); |
31822 | bool Regs64bit = T == MVT::i128; |
31823 | assert((!Regs64bit || Subtarget.hasCmpxchg16b()) && |
31824 | "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B"); |
31825 | MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; |
31826 | SDValue cpInL, cpInH; |
31827 | cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), |
31828 | DAG.getConstant(0, dl, HalfT)); |
31829 | cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), |
31830 | DAG.getConstant(1, dl, HalfT)); |
31831 | cpInL = DAG.getCopyToReg(N->getOperand(0), dl, |
31832 | Regs64bit ? X86::RAX : X86::EAX, |
31833 | cpInL, SDValue()); |
31834 | cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, |
31835 | Regs64bit ? X86::RDX : X86::EDX, |
31836 | cpInH, cpInL.getValue(1)); |
31837 | SDValue swapInL, swapInH; |
31838 | swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), |
31839 | DAG.getConstant(0, dl, HalfT)); |
31840 | swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), |
31841 | DAG.getConstant(1, dl, HalfT)); |
31842 | swapInH = |
31843 | DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX, |
31844 | swapInH, cpInH.getValue(1)); |
31845 | |
31846 | |
31847 | |
31848 | |
31849 | |
31850 | |
31851 | |
31852 | SDValue Result; |
31853 | SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); |
31854 | MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); |
31855 | if (Regs64bit) { |
31856 | SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL, |
31857 | swapInH.getValue(1)}; |
31858 | Result = |
31859 | DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_DAG, dl, Tys, Ops, T, MMO); |
31860 | } else { |
31861 | swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl, X86::EBX, swapInL, |
31862 | swapInH.getValue(1)); |
31863 | SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1), |
31864 | swapInL.getValue(1)}; |
31865 | Result = |
31866 | DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, T, MMO); |
31867 | } |
31868 | |
31869 | SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, |
31870 | Regs64bit ? X86::RAX : X86::EAX, |
31871 | HalfT, Result.getValue(1)); |
31872 | SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, |
31873 | Regs64bit ? X86::RDX : X86::EDX, |
31874 | HalfT, cpOutL.getValue(2)); |
31875 | SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; |
31876 | |
31877 | SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS, |
31878 | MVT::i32, cpOutH.getValue(2)); |
31879 | SDValue Success = getSETCC(X86::COND_E, EFLAGS, dl, DAG); |
31880 | Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1)); |
31881 | |
31882 | Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF)); |
31883 | Results.push_back(Success); |
31884 | Results.push_back(EFLAGS.getValue(1)); |
31885 | return; |
31886 | } |
31887 | case ISD::ATOMIC_LOAD: { |
31888 | assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); |
31889 | bool NoImplicitFloatOps = |
31890 | DAG.getMachineFunction().getFunction().hasFnAttribute( |
31891 | Attribute::NoImplicitFloat); |
31892 | if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { |
31893 | auto *Node = cast<AtomicSDNode>(N); |
31894 | if (Subtarget.hasSSE1()) { |
31895 | |
31896 | |
31897 | MVT LdVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32; |
31898 | SDVTList Tys = DAG.getVTList(LdVT, MVT::Other); |
31899 | SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; |
31900 | SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, |
31901 | MVT::i64, Node->getMemOperand()); |
31902 | if (Subtarget.hasSSE2()) { |
31903 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, |
31904 | DAG.getIntPtrConstant(0, dl)); |
31905 | Results.push_back(Res); |
31906 | Results.push_back(Ld.getValue(1)); |
31907 | return; |
31908 | } |
31909 | |
31910 | |
31911 | |
31912 | SDValue Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Ld, |
31913 | DAG.getIntPtrConstant(0, dl)); |
31914 | Res = DAG.getBitcast(MVT::i64, Res); |
31915 | Results.push_back(Res); |
31916 | Results.push_back(Ld.getValue(1)); |
31917 | return; |
31918 | } |
31919 | if (Subtarget.hasX87()) { |
31920 | |
31921 | |
31922 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
31923 | SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; |
31924 | SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD, |
31925 | dl, Tys, Ops, MVT::i64, |
31926 | Node->getMemOperand()); |
31927 | SDValue Chain = Result.getValue(1); |
31928 | |
31929 | |
31930 | |
31931 | |
31932 | |
31933 | SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); |
31934 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
31935 | MachinePointerInfo MPI = |
31936 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
31937 | SDValue StoreOps[] = { Chain, Result, StackPtr }; |
31938 | Chain = DAG.getMemIntrinsicNode( |
31939 | X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, |
31940 | MPI, None , MachineMemOperand::MOStore); |
31941 | |
31942 | |
31943 | |
31944 | |
31945 | Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI); |
31946 | Results.push_back(Result); |
31947 | Results.push_back(Result.getValue(1)); |
31948 | return; |
31949 | } |
31950 | } |
31951 | |
31952 | |
31953 | |
31954 | break; |
31955 | } |
31956 | case ISD::ATOMIC_SWAP: |
31957 | case ISD::ATOMIC_LOAD_ADD: |
31958 | case ISD::ATOMIC_LOAD_SUB: |
31959 | case ISD::ATOMIC_LOAD_AND: |
31960 | case ISD::ATOMIC_LOAD_OR: |
31961 | case ISD::ATOMIC_LOAD_XOR: |
31962 | case ISD::ATOMIC_LOAD_NAND: |
31963 | case ISD::ATOMIC_LOAD_MIN: |
31964 | case ISD::ATOMIC_LOAD_MAX: |
31965 | case ISD::ATOMIC_LOAD_UMIN: |
31966 | case ISD::ATOMIC_LOAD_UMAX: |
31967 | |
31968 | |
31969 | break; |
31970 | |
31971 | case ISD::BITCAST: { |
31972 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
31973 | EVT DstVT = N->getValueType(0); |
31974 | EVT SrcVT = N->getOperand(0).getValueType(); |
31975 | |
31976 | |
31977 | |
31978 | if (SrcVT == MVT::v64i1 && DstVT == MVT::i64 && Subtarget.hasBWI()) { |
31979 | assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); |
31980 | SDValue Lo, Hi; |
31981 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); |
31982 | Lo = DAG.getBitcast(MVT::i32, Lo); |
31983 | Hi = DAG.getBitcast(MVT::i32, Hi); |
31984 | SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); |
31985 | Results.push_back(Res); |
31986 | return; |
31987 | } |
31988 | |
31989 | if (DstVT.isVector() && SrcVT == MVT::x86mmx) { |
31990 | |
31991 | assert(Subtarget.hasSSE2() && "Requires SSE2"); |
31992 | assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector && |
31993 | "Unexpected type action!"); |
31994 | EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT); |
31995 | SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, |
31996 | N->getOperand(0)); |
31997 | Res = DAG.getBitcast(WideVT, Res); |
31998 | Results.push_back(Res); |
31999 | return; |
32000 | } |
32001 | |
32002 | return; |
32003 | } |
32004 | case ISD::MGATHER: { |
32005 | EVT VT = N->getValueType(0); |
32006 | if ((VT == MVT::v2f32 || VT == MVT::v2i32) && |
32007 | (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { |
32008 | auto *Gather = cast<MaskedGatherSDNode>(N); |
32009 | SDValue Index = Gather->getIndex(); |
32010 | if (Index.getValueType() != MVT::v2i64) |
32011 | return; |
32012 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
32013 | "Unexpected type action!"); |
32014 | EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); |
32015 | SDValue Mask = Gather->getMask(); |
32016 | assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); |
32017 | SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, |
32018 | Gather->getPassThru(), |
32019 | DAG.getUNDEF(VT)); |
32020 | if (!Subtarget.hasVLX()) { |
32021 | |
32022 | |
32023 | Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, |
32024 | DAG.getUNDEF(MVT::v2i1)); |
32025 | Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); |
32026 | } |
32027 | SDValue Ops[] = { Gather->getChain(), PassThru, Mask, |
32028 | Gather->getBasePtr(), Index, Gather->getScale() }; |
32029 | SDValue Res = DAG.getMemIntrinsicNode( |
32030 | X86ISD::MGATHER, dl, DAG.getVTList(WideVT, MVT::Other), Ops, |
32031 | Gather->getMemoryVT(), Gather->getMemOperand()); |
32032 | Results.push_back(Res); |
32033 | Results.push_back(Res.getValue(1)); |
32034 | return; |
32035 | } |
32036 | return; |
32037 | } |
32038 | case ISD::LOAD: { |
32039 | |
32040 | |
32041 | |
32042 | MVT VT = N->getSimpleValueType(0); |
32043 | assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT"); |
32044 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
32045 | "Unexpected type action!"); |
32046 | if (!ISD::isNON_EXTLoad(N)) |
32047 | return; |
32048 | auto *Ld = cast<LoadSDNode>(N); |
32049 | if (Subtarget.hasSSE2()) { |
32050 | MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64; |
32051 | SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(), |
32052 | Ld->getPointerInfo(), Ld->getOriginalAlign(), |
32053 | Ld->getMemOperand()->getFlags()); |
32054 | SDValue Chain = Res.getValue(1); |
32055 | MVT VecVT = MVT::getVectorVT(LdVT, 2); |
32056 | Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Res); |
32057 | EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); |
32058 | Res = DAG.getBitcast(WideVT, Res); |
32059 | Results.push_back(Res); |
32060 | Results.push_back(Chain); |
32061 | return; |
32062 | } |
32063 | assert(Subtarget.hasSSE1() && "Expected SSE"); |
32064 | SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other); |
32065 | SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; |
32066 | SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, |
32067 | MVT::i64, Ld->getMemOperand()); |
32068 | Results.push_back(Res); |
32069 | Results.push_back(Res.getValue(1)); |
32070 | return; |
32071 | } |
32072 | case ISD::ADDRSPACECAST: { |
32073 | SDValue V = LowerADDRSPACECAST(SDValue(N,0), DAG); |
32074 | Results.push_back(V); |
32075 | return; |
32076 | } |
32077 | case ISD::BITREVERSE: |
32078 | assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); |
32079 | assert(Subtarget.hasXOP() && "Expected XOP"); |
32080 | |
32081 | |
32082 | Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG)); |
32083 | return; |
32084 | } |
32085 | } |
32086 | |
32087 | const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { |
32088 | switch ((X86ISD::NodeType)Opcode) { |
32089 | case X86ISD::FIRST_NUMBER: break; |
32090 | #define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE; |
32091 | NODE_NAME_CASE(BSF) |
32092 | NODE_NAME_CASE(BSR) |
32093 | NODE_NAME_CASE(FSHL) |
32094 | NODE_NAME_CASE(FSHR) |
32095 | NODE_NAME_CASE(FAND) |
32096 | NODE_NAME_CASE(FANDN) |
32097 | NODE_NAME_CASE(FOR) |
32098 | NODE_NAME_CASE(FXOR) |
32099 | NODE_NAME_CASE(FILD) |
32100 | NODE_NAME_CASE(FIST) |
32101 | NODE_NAME_CASE(FP_TO_INT_IN_MEM) |
32102 | NODE_NAME_CASE(FLD) |
32103 | NODE_NAME_CASE(FST) |
32104 | NODE_NAME_CASE(CALL) |
32105 | NODE_NAME_CASE(CALL_RVMARKER) |
32106 | NODE_NAME_CASE(BT) |
32107 | NODE_NAME_CASE(CMP) |
32108 | NODE_NAME_CASE(FCMP) |
32109 | NODE_NAME_CASE(STRICT_FCMP) |
32110 | NODE_NAME_CASE(STRICT_FCMPS) |
32111 | NODE_NAME_CASE(COMI) |
32112 | NODE_NAME_CASE(UCOMI) |
32113 | NODE_NAME_CASE(CMPM) |
32114 | NODE_NAME_CASE(CMPMM) |
32115 | NODE_NAME_CASE(STRICT_CMPM) |
32116 | NODE_NAME_CASE(CMPMM_SAE) |
32117 | NODE_NAME_CASE(SETCC) |
32118 | NODE_NAME_CASE(SETCC_CARRY) |
32119 | NODE_NAME_CASE(FSETCC) |
32120 | NODE_NAME_CASE(FSETCCM) |
32121 | NODE_NAME_CASE(FSETCCM_SAE) |
32122 | NODE_NAME_CASE(CMOV) |
32123 | NODE_NAME_CASE(BRCOND) |
32124 | NODE_NAME_CASE(RET_FLAG) |
32125 | NODE_NAME_CASE(IRET) |
32126 | NODE_NAME_CASE(REP_STOS) |
32127 | NODE_NAME_CASE(REP_MOVS) |
32128 | NODE_NAME_CASE(GlobalBaseReg) |
32129 | NODE_NAME_CASE(Wrapper) |
32130 | NODE_NAME_CASE(WrapperRIP) |
32131 | NODE_NAME_CASE(MOVQ2DQ) |
32132 | NODE_NAME_CASE(MOVDQ2Q) |
32133 | NODE_NAME_CASE(MMX_MOVD2W) |
32134 | NODE_NAME_CASE(MMX_MOVW2D) |
32135 | NODE_NAME_CASE(PEXTRB) |
32136 | NODE_NAME_CASE(PEXTRW) |
32137 | NODE_NAME_CASE(INSERTPS) |
32138 | NODE_NAME_CASE(PINSRB) |
32139 | NODE_NAME_CASE(PINSRW) |
32140 | NODE_NAME_CASE(PSHUFB) |
32141 | NODE_NAME_CASE(ANDNP) |
32142 | NODE_NAME_CASE(BLENDI) |
32143 | NODE_NAME_CASE(BLENDV) |
32144 | NODE_NAME_CASE(HADD) |
32145 | NODE_NAME_CASE(HSUB) |
32146 | NODE_NAME_CASE(FHADD) |
32147 | NODE_NAME_CASE(FHSUB) |
32148 | NODE_NAME_CASE(CONFLICT) |
32149 | NODE_NAME_CASE(FMAX) |
32150 | NODE_NAME_CASE(FMAXS) |
32151 | NODE_NAME_CASE(FMAX_SAE) |
32152 | NODE_NAME_CASE(FMAXS_SAE) |
32153 | NODE_NAME_CASE(FMIN) |
32154 | NODE_NAME_CASE(FMINS) |
32155 | NODE_NAME_CASE(FMIN_SAE) |
32156 | NODE_NAME_CASE(FMINS_SAE) |
32157 | NODE_NAME_CASE(FMAXC) |
32158 | NODE_NAME_CASE(FMINC) |
32159 | NODE_NAME_CASE(FRSQRT) |
32160 | NODE_NAME_CASE(FRCP) |
32161 | NODE_NAME_CASE(EXTRQI) |
32162 | NODE_NAME_CASE(INSERTQI) |
32163 | NODE_NAME_CASE(TLSADDR) |
32164 | NODE_NAME_CASE(TLSBASEADDR) |
32165 | NODE_NAME_CASE(TLSCALL) |
32166 | NODE_NAME_CASE(EH_SJLJ_SETJMP) |
32167 | NODE_NAME_CASE(EH_SJLJ_LONGJMP) |
32168 | NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH) |
32169 | NODE_NAME_CASE(EH_RETURN) |
32170 | NODE_NAME_CASE(TC_RETURN) |
32171 | NODE_NAME_CASE(FNSTCW16m) |
32172 | NODE_NAME_CASE(FLDCW16m) |
32173 | NODE_NAME_CASE(LCMPXCHG_DAG) |
32174 | NODE_NAME_CASE(LCMPXCHG8_DAG) |
32175 | NODE_NAME_CASE(LCMPXCHG16_DAG) |
32176 | NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG) |
32177 | NODE_NAME_CASE(LADD) |
32178 | NODE_NAME_CASE(LSUB) |
32179 | NODE_NAME_CASE(LOR) |
32180 | NODE_NAME_CASE(LXOR) |
32181 | NODE_NAME_CASE(LAND) |
32182 | NODE_NAME_CASE(VZEXT_MOVL) |
32183 | NODE_NAME_CASE(VZEXT_LOAD) |
32184 | NODE_NAME_CASE(VEXTRACT_STORE) |
32185 | NODE_NAME_CASE(VTRUNC) |
32186 | NODE_NAME_CASE(VTRUNCS) |
32187 | NODE_NAME_CASE(VTRUNCUS) |
32188 | NODE_NAME_CASE(VMTRUNC) |
32189 | NODE_NAME_CASE(VMTRUNCS) |
32190 | NODE_NAME_CASE(VMTRUNCUS) |
32191 | NODE_NAME_CASE(VTRUNCSTORES) |
32192 | NODE_NAME_CASE(VTRUNCSTOREUS) |
32193 | NODE_NAME_CASE(VMTRUNCSTORES) |
32194 | NODE_NAME_CASE(VMTRUNCSTOREUS) |
32195 | NODE_NAME_CASE(VFPEXT) |
32196 | NODE_NAME_CASE(STRICT_VFPEXT) |
32197 | NODE_NAME_CASE(VFPEXT_SAE) |
32198 | NODE_NAME_CASE(VFPEXTS) |
32199 | NODE_NAME_CASE(VFPEXTS_SAE) |
32200 | NODE_NAME_CASE(VFPROUND) |
32201 | NODE_NAME_CASE(STRICT_VFPROUND) |
32202 | NODE_NAME_CASE(VMFPROUND) |
32203 | NODE_NAME_CASE(VFPROUND_RND) |
32204 | NODE_NAME_CASE(VFPROUNDS) |
32205 | NODE_NAME_CASE(VFPROUNDS_RND) |
32206 | NODE_NAME_CASE(VSHLDQ) |
32207 | NODE_NAME_CASE(VSRLDQ) |
32208 | NODE_NAME_CASE(VSHL) |
32209 | NODE_NAME_CASE(VSRL) |
32210 | NODE_NAME_CASE(VSRA) |
32211 | NODE_NAME_CASE(VSHLI) |
32212 | NODE_NAME_CASE(VSRLI) |
32213 | NODE_NAME_CASE(VSRAI) |
32214 | NODE_NAME_CASE(VSHLV) |
32215 | NODE_NAME_CASE(VSRLV) |
32216 | NODE_NAME_CASE(VSRAV) |
32217 | NODE_NAME_CASE(VROTLI) |
32218 | NODE_NAME_CASE(VROTRI) |
32219 | NODE_NAME_CASE(VPPERM) |
32220 | NODE_NAME_CASE(CMPP) |
32221 | NODE_NAME_CASE(STRICT_CMPP) |
32222 | NODE_NAME_CASE(PCMPEQ) |
32223 | NODE_NAME_CASE(PCMPGT) |
32224 | NODE_NAME_CASE(PHMINPOS) |
32225 | NODE_NAME_CASE(ADD) |
32226 | NODE_NAME_CASE(SUB) |
32227 | NODE_NAME_CASE(ADC) |
32228 | NODE_NAME_CASE(SBB) |
32229 | NODE_NAME_CASE(SMUL) |
32230 | NODE_NAME_CASE(UMUL) |
32231 | NODE_NAME_CASE(OR) |
32232 | NODE_NAME_CASE(XOR) |
32233 | NODE_NAME_CASE(AND) |
32234 | NODE_NAME_CASE(BEXTR) |
32235 | NODE_NAME_CASE(BEXTRI) |
32236 | NODE_NAME_CASE(BZHI) |
32237 | NODE_NAME_CASE(PDEP) |
32238 | NODE_NAME_CASE(PEXT) |
32239 | NODE_NAME_CASE(MUL_IMM) |
32240 | NODE_NAME_CASE(MOVMSK) |
32241 | NODE_NAME_CASE(PTEST) |
32242 | NODE_NAME_CASE(TESTP) |
32243 | NODE_NAME_CASE(KORTEST) |
32244 | NODE_NAME_CASE(KTEST) |
32245 | NODE_NAME_CASE(KADD) |
32246 | NODE_NAME_CASE(KSHIFTL) |
32247 | NODE_NAME_CASE(KSHIFTR) |
32248 | NODE_NAME_CASE(PACKSS) |
32249 | NODE_NAME_CASE(PACKUS) |
32250 | NODE_NAME_CASE(PALIGNR) |
32251 | NODE_NAME_CASE(VALIGN) |
32252 | NODE_NAME_CASE(VSHLD) |
32253 | NODE_NAME_CASE(VSHRD) |
32254 | NODE_NAME_CASE(VSHLDV) |
32255 | NODE_NAME_CASE(VSHRDV) |
32256 | NODE_NAME_CASE(PSHUFD) |
32257 | NODE_NAME_CASE(PSHUFHW) |
32258 | NODE_NAME_CASE(PSHUFLW) |
32259 | NODE_NAME_CASE(SHUFP) |
32260 | NODE_NAME_CASE(SHUF128) |
32261 | NODE_NAME_CASE(MOVLHPS) |
32262 | NODE_NAME_CASE(MOVHLPS) |
32263 | NODE_NAME_CASE(MOVDDUP) |
32264 | NODE_NAME_CASE(MOVSHDUP) |
32265 | NODE_NAME_CASE(MOVSLDUP) |
32266 | NODE_NAME_CASE(MOVSD) |
32267 | NODE_NAME_CASE(MOVSS) |
32268 | NODE_NAME_CASE(MOVSH) |
32269 | NODE_NAME_CASE(UNPCKL) |
32270 | NODE_NAME_CASE(UNPCKH) |
32271 | NODE_NAME_CASE(VBROADCAST) |
32272 | NODE_NAME_CASE(VBROADCAST_LOAD) |
32273 | NODE_NAME_CASE(VBROADCASTM) |
32274 | NODE_NAME_CASE(SUBV_BROADCAST_LOAD) |
32275 | NODE_NAME_CASE(VPERMILPV) |
32276 | NODE_NAME_CASE(VPERMILPI) |
32277 | NODE_NAME_CASE(VPERM2X128) |
32278 | NODE_NAME_CASE(VPERMV) |
32279 | NODE_NAME_CASE(VPERMV3) |
32280 | NODE_NAME_CASE(VPERMI) |
32281 | NODE_NAME_CASE(VPTERNLOG) |
32282 | NODE_NAME_CASE(VFIXUPIMM) |
32283 | NODE_NAME_CASE(VFIXUPIMM_SAE) |
32284 | NODE_NAME_CASE(VFIXUPIMMS) |
32285 | NODE_NAME_CASE(VFIXUPIMMS_SAE) |
32286 | NODE_NAME_CASE(VRANGE) |
32287 | NODE_NAME_CASE(VRANGE_SAE) |
32288 | NODE_NAME_CASE(VRANGES) |
32289 | NODE_NAME_CASE(VRANGES_SAE) |
32290 | NODE_NAME_CASE(PMULUDQ) |
32291 | NODE_NAME_CASE(PMULDQ) |
32292 | NODE_NAME_CASE(PSADBW) |
32293 | NODE_NAME_CASE(DBPSADBW) |
32294 | NODE_NAME_CASE(VASTART_SAVE_XMM_REGS) |
32295 | NODE_NAME_CASE(VAARG_64) |
32296 | NODE_NAME_CASE(VAARG_X32) |
32297 | NODE_NAME_CASE(WIN_ALLOCA) |
32298 | NODE_NAME_CASE(MEMBARRIER) |
32299 | NODE_NAME_CASE(MFENCE) |
32300 | NODE_NAME_CASE(SEG_ALLOCA) |
32301 | NODE_NAME_CASE(PROBED_ALLOCA) |
32302 | NODE_NAME_CASE(RDRAND) |
32303 | NODE_NAME_CASE(RDSEED) |
32304 | NODE_NAME_CASE(RDPKRU) |
32305 | NODE_NAME_CASE(WRPKRU) |
32306 | NODE_NAME_CASE(VPMADDUBSW) |
32307 | NODE_NAME_CASE(VPMADDWD) |
32308 | NODE_NAME_CASE(VPSHA) |
32309 | NODE_NAME_CASE(VPSHL) |
32310 | NODE_NAME_CASE(VPCOM) |
32311 | NODE_NAME_CASE(VPCOMU) |
32312 | NODE_NAME_CASE(VPERMIL2) |
32313 | NODE_NAME_CASE(FMSUB) |
32314 | NODE_NAME_CASE(STRICT_FMSUB) |
32315 | NODE_NAME_CASE(FNMADD) |
32316 | NODE_NAME_CASE(STRICT_FNMADD) |
32317 | NODE_NAME_CASE(FNMSUB) |
32318 | NODE_NAME_CASE(STRICT_FNMSUB) |
32319 | NODE_NAME_CASE(FMADDSUB) |
32320 | NODE_NAME_CASE(FMSUBADD) |
32321 | NODE_NAME_CASE(FMADD_RND) |
32322 | NODE_NAME_CASE(FNMADD_RND) |
32323 | NODE_NAME_CASE(FMSUB_RND) |
32324 | NODE_NAME_CASE(FNMSUB_RND) |
32325 | NODE_NAME_CASE(FMADDSUB_RND) |
32326 | NODE_NAME_CASE(FMSUBADD_RND) |
32327 | NODE_NAME_CASE(VFMADDC) |
32328 | NODE_NAME_CASE(VFMADDC_RND) |
32329 | NODE_NAME_CASE(VFCMADDC) |
32330 | NODE_NAME_CASE(VFCMADDC_RND) |
32331 | NODE_NAME_CASE(VFMULC) |
32332 | NODE_NAME_CASE(VFMULC_RND) |
32333 | NODE_NAME_CASE(VFCMULC) |
32334 | NODE_NAME_CASE(VFCMULC_RND) |
32335 | NODE_NAME_CASE(VFMULCSH) |
32336 | NODE_NAME_CASE(VFMULCSH_RND) |
32337 | NODE_NAME_CASE(VFCMULCSH) |
32338 | NODE_NAME_CASE(VFCMULCSH_RND) |
32339 | NODE_NAME_CASE(VFMADDCSH) |
32340 | NODE_NAME_CASE(VFMADDCSH_RND) |
32341 | NODE_NAME_CASE(VFCMADDCSH) |
32342 | NODE_NAME_CASE(VFCMADDCSH_RND) |
32343 | NODE_NAME_CASE(VPMADD52H) |
32344 | NODE_NAME_CASE(VPMADD52L) |
32345 | NODE_NAME_CASE(VRNDSCALE) |
32346 | NODE_NAME_CASE(STRICT_VRNDSCALE) |
32347 | NODE_NAME_CASE(VRNDSCALE_SAE) |
32348 | NODE_NAME_CASE(VRNDSCALES) |
32349 | NODE_NAME_CASE(VRNDSCALES_SAE) |
32350 | NODE_NAME_CASE(VREDUCE) |
32351 | NODE_NAME_CASE(VREDUCE_SAE) |
32352 | NODE_NAME_CASE(VREDUCES) |
32353 | NODE_NAME_CASE(VREDUCES_SAE) |
32354 | NODE_NAME_CASE(VGETMANT) |
32355 | NODE_NAME_CASE(VGETMANT_SAE) |
32356 | NODE_NAME_CASE(VGETMANTS) |
32357 | NODE_NAME_CASE(VGETMANTS_SAE) |
32358 | NODE_NAME_CASE(PCMPESTR) |
32359 | NODE_NAME_CASE(PCMPISTR) |
32360 | NODE_NAME_CASE(XTEST) |
32361 | NODE_NAME_CASE(COMPRESS) |
32362 | NODE_NAME_CASE(EXPAND) |
32363 | NODE_NAME_CASE(SELECTS) |
32364 | NODE_NAME_CASE(ADDSUB) |
32365 | NODE_NAME_CASE(RCP14) |
32366 | NODE_NAME_CASE(RCP14S) |
32367 | NODE_NAME_CASE(RCP28) |
32368 | NODE_NAME_CASE(RCP28_SAE) |
32369 | NODE_NAME_CASE(RCP28S) |
32370 | NODE_NAME_CASE(RCP28S_SAE) |
32371 | NODE_NAME_CASE(EXP2) |
32372 | NODE_NAME_CASE(EXP2_SAE) |
32373 | NODE_NAME_CASE(RSQRT14) |
32374 | NODE_NAME_CASE(RSQRT14S) |
32375 | NODE_NAME_CASE(RSQRT28) |
32376 | NODE_NAME_CASE(RSQRT28_SAE) |
32377 | NODE_NAME_CASE(RSQRT28S) |
32378 | NODE_NAME_CASE(RSQRT28S_SAE) |
32379 | NODE_NAME_CASE(FADD_RND) |
32380 | NODE_NAME_CASE(FADDS) |
32381 | NODE_NAME_CASE(FADDS_RND) |
32382 | NODE_NAME_CASE(FSUB_RND) |
32383 | NODE_NAME_CASE(FSUBS) |
32384 | NODE_NAME_CASE(FSUBS_RND) |
32385 | NODE_NAME_CASE(FMUL_RND) |
32386 | NODE_NAME_CASE(FMULS) |
32387 | NODE_NAME_CASE(FMULS_RND) |
32388 | NODE_NAME_CASE(FDIV_RND) |
32389 | NODE_NAME_CASE(FDIVS) |
32390 | NODE_NAME_CASE(FDIVS_RND) |
32391 | NODE_NAME_CASE(FSQRT_RND) |
32392 | NODE_NAME_CASE(FSQRTS) |
32393 | NODE_NAME_CASE(FSQRTS_RND) |
32394 | NODE_NAME_CASE(FGETEXP) |
32395 | NODE_NAME_CASE(FGETEXP_SAE) |
32396 | NODE_NAME_CASE(FGETEXPS) |
32397 | NODE_NAME_CASE(FGETEXPS_SAE) |
32398 | NODE_NAME_CASE(SCALEF) |
32399 | NODE_NAME_CASE(SCALEF_RND) |
32400 | NODE_NAME_CASE(SCALEFS) |
32401 | NODE_NAME_CASE(SCALEFS_RND) |
32402 | NODE_NAME_CASE(AVG) |
32403 | NODE_NAME_CASE(MULHRS) |
32404 | NODE_NAME_CASE(SINT_TO_FP_RND) |
32405 | NODE_NAME_CASE(UINT_TO_FP_RND) |
32406 | NODE_NAME_CASE(CVTTP2SI) |
32407 | NODE_NAME_CASE(CVTTP2UI) |
32408 | NODE_NAME_CASE(STRICT_CVTTP2SI) |
32409 | NODE_NAME_CASE(STRICT_CVTTP2UI) |
32410 | NODE_NAME_CASE(MCVTTP2SI) |
32411 | NODE_NAME_CASE(MCVTTP2UI) |
32412 | NODE_NAME_CASE(CVTTP2SI_SAE) |
32413 | NODE_NAME_CASE(CVTTP2UI_SAE) |
32414 | NODE_NAME_CASE(CVTTS2SI) |
32415 | NODE_NAME_CASE(CVTTS2UI) |
32416 | NODE_NAME_CASE(CVTTS2SI_SAE) |
32417 | NODE_NAME_CASE(CVTTS2UI_SAE) |
32418 | NODE_NAME_CASE(CVTSI2P) |
32419 | NODE_NAME_CASE(CVTUI2P) |
32420 | NODE_NAME_CASE(STRICT_CVTSI2P) |
32421 | NODE_NAME_CASE(STRICT_CVTUI2P) |
32422 | NODE_NAME_CASE(MCVTSI2P) |
32423 | NODE_NAME_CASE(MCVTUI2P) |
32424 | NODE_NAME_CASE(VFPCLASS) |
32425 | NODE_NAME_CASE(VFPCLASSS) |
32426 | NODE_NAME_CASE(MULTISHIFT) |
32427 | NODE_NAME_CASE(SCALAR_SINT_TO_FP) |
32428 | NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND) |
32429 | NODE_NAME_CASE(SCALAR_UINT_TO_FP) |
32430 | NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND) |
32431 | NODE_NAME_CASE(CVTPS2PH) |
32432 | NODE_NAME_CASE(STRICT_CVTPS2PH) |
32433 | NODE_NAME_CASE(MCVTPS2PH) |
32434 | NODE_NAME_CASE(CVTPH2PS) |
32435 | NODE_NAME_CASE(STRICT_CVTPH2PS) |
32436 | NODE_NAME_CASE(CVTPH2PS_SAE) |
32437 | NODE_NAME_CASE(CVTP2SI) |
32438 | NODE_NAME_CASE(CVTP2UI) |
32439 | NODE_NAME_CASE(MCVTP2SI) |
32440 | NODE_NAME_CASE(MCVTP2UI) |
32441 | NODE_NAME_CASE(CVTP2SI_RND) |
32442 | NODE_NAME_CASE(CVTP2UI_RND) |
32443 | NODE_NAME_CASE(CVTS2SI) |
32444 | NODE_NAME_CASE(CVTS2UI) |
32445 | NODE_NAME_CASE(CVTS2SI_RND) |
32446 | NODE_NAME_CASE(CVTS2UI_RND) |
32447 | NODE_NAME_CASE(CVTNE2PS2BF16) |
32448 | NODE_NAME_CASE(CVTNEPS2BF16) |
32449 | NODE_NAME_CASE(MCVTNEPS2BF16) |
32450 | NODE_NAME_CASE(DPBF16PS) |
32451 | NODE_NAME_CASE(LWPINS) |
32452 | NODE_NAME_CASE(MGATHER) |
32453 | NODE_NAME_CASE(MSCATTER) |
32454 | NODE_NAME_CASE(VPDPBUSD) |
32455 | NODE_NAME_CASE(VPDPBUSDS) |
32456 | NODE_NAME_CASE(VPDPWSSD) |
32457 | NODE_NAME_CASE(VPDPWSSDS) |
32458 | NODE_NAME_CASE(VPSHUFBITQMB) |
32459 | NODE_NAME_CASE(GF2P8MULB) |
32460 | NODE_NAME_CASE(GF2P8AFFINEQB) |
32461 | NODE_NAME_CASE(GF2P8AFFINEINVQB) |
32462 | NODE_NAME_CASE(NT_CALL) |
32463 | NODE_NAME_CASE(NT_BRIND) |
32464 | NODE_NAME_CASE(UMWAIT) |
32465 | NODE_NAME_CASE(TPAUSE) |
32466 | NODE_NAME_CASE(ENQCMD) |
32467 | NODE_NAME_CASE(ENQCMDS) |
32468 | NODE_NAME_CASE(VP2INTERSECT) |
32469 | NODE_NAME_CASE(AESENC128KL) |
32470 | NODE_NAME_CASE(AESDEC128KL) |
32471 | NODE_NAME_CASE(AESENC256KL) |
32472 | NODE_NAME_CASE(AESDEC256KL) |
32473 | NODE_NAME_CASE(AESENCWIDE128KL) |
32474 | NODE_NAME_CASE(AESDECWIDE128KL) |
32475 | NODE_NAME_CASE(AESENCWIDE256KL) |
32476 | NODE_NAME_CASE(AESDECWIDE256KL) |
32477 | NODE_NAME_CASE(TESTUI) |
32478 | } |
32479 | return nullptr; |
32480 | #undef NODE_NAME_CASE |
32481 | } |
32482 | |
32483 | |
32484 | |
32485 | bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, |
32486 | const AddrMode &AM, Type *Ty, |
32487 | unsigned AS, |
32488 | Instruction *I) const { |
32489 | |
32490 | CodeModel::Model M = getTargetMachine().getCodeModel(); |
32491 | |
32492 | |
32493 | if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr)) |
32494 | return false; |
32495 | |
32496 | if (AM.BaseGV) { |
32497 | unsigned GVFlags = Subtarget.classifyGlobalReference(AM.BaseGV); |
32498 | |
32499 | |
32500 | if (isGlobalStubReference(GVFlags)) |
32501 | return false; |
32502 | |
32503 | |
32504 | |
32505 | if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags)) |
32506 | return false; |
32507 | |
32508 | |
32509 | if ((M != CodeModel::Small || isPositionIndependent()) && |
32510 | Subtarget.is64Bit() && (AM.BaseOffs || AM.Scale > 1)) |
32511 | return false; |
32512 | } |
32513 | |
32514 | switch (AM.Scale) { |
32515 | case 0: |
32516 | case 1: |
32517 | case 2: |
32518 | case 4: |
32519 | case 8: |
32520 | |
32521 | break; |
32522 | case 3: |
32523 | case 5: |
32524 | case 9: |
32525 | |
32526 | |
32527 | if (AM.HasBaseReg) |
32528 | return false; |
32529 | break; |
32530 | default: |
32531 | return false; |
32532 | } |
32533 | |
32534 | return true; |
32535 | } |
32536 | |
32537 | bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { |
32538 | unsigned Bits = Ty->getScalarSizeInBits(); |
32539 | |
32540 | |
32541 | |
32542 | if (Bits == 8) |
32543 | return false; |
32544 | |
32545 | |
32546 | |
32547 | if (Subtarget.hasXOP() && |
32548 | (Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64)) |
32549 | return false; |
32550 | |
32551 | |
32552 | |
32553 | if (Subtarget.hasAVX2() && (Bits == 32 || Bits == 64)) |
32554 | return false; |
32555 | |
32556 | |
32557 | if (Subtarget.hasBWI() && Bits == 16) |
32558 | return false; |
32559 | |
32560 | |
32561 | |
32562 | return true; |
32563 | } |
32564 | |
32565 | bool X86TargetLowering::isBinOp(unsigned Opcode) const { |
32566 | switch (Opcode) { |
32567 | |
32568 | |
32569 | case X86ISD::ANDNP: |
32570 | case X86ISD::PCMPGT: |
32571 | case X86ISD::FMAX: |
32572 | case X86ISD::FMIN: |
32573 | case X86ISD::FANDN: |
32574 | return true; |
32575 | } |
32576 | |
32577 | return TargetLoweringBase::isBinOp(Opcode); |
32578 | } |
32579 | |
32580 | bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const { |
32581 | switch (Opcode) { |
32582 | |
32583 | case X86ISD::PCMPEQ: |
32584 | case X86ISD::PMULDQ: |
32585 | case X86ISD::PMULUDQ: |
32586 | case X86ISD::FMAXC: |
32587 | case X86ISD::FMINC: |
32588 | case X86ISD::FAND: |
32589 | case X86ISD::FOR: |
32590 | case X86ISD::FXOR: |
32591 | return true; |
32592 | } |
32593 | |
32594 | return TargetLoweringBase::isCommutativeBinOp(Opcode); |
32595 | } |
32596 | |
32597 | bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { |
32598 | if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) |
32599 | return false; |
32600 | unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); |
32601 | unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); |
32602 | return NumBits1 > NumBits2; |
32603 | } |
32604 | |
32605 | bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { |
32606 | if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) |
32607 | return false; |
32608 | |
32609 | if (!isTypeLegal(EVT::getEVT(Ty1))) |
32610 | return false; |
32611 | |
32612 | assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); |
32613 | |
32614 | |
32615 | |
32616 | return true; |
32617 | } |
32618 | |
32619 | bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
32620 | return isInt<32>(Imm); |
32621 | } |
32622 | |
32623 | bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const { |
32624 | |
32625 | return isInt<32>(Imm); |
32626 | } |
32627 | |
32628 | bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const { |
32629 | return isInt<32>(Imm); |
32630 | } |
32631 | |
32632 | bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { |
32633 | if (!VT1.isScalarInteger() || !VT2.isScalarInteger()) |
32634 | return false; |
32635 | unsigned NumBits1 = VT1.getSizeInBits(); |
32636 | unsigned NumBits2 = VT2.getSizeInBits(); |
32637 | return NumBits1 > NumBits2; |
32638 | } |
32639 | |
32640 | bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { |
32641 | |
32642 | return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit(); |
32643 | } |
32644 | |
32645 | bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { |
32646 | |
32647 | return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit(); |
32648 | } |
32649 | |
32650 | bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
32651 | EVT VT1 = Val.getValueType(); |
32652 | if (isZExtFree(VT1, VT2)) |
32653 | return true; |
32654 | |
32655 | if (Val.getOpcode() != ISD::LOAD) |
32656 | return false; |
32657 | |
32658 | if (!VT1.isSimple() || !VT1.isInteger() || |
32659 | !VT2.isSimple() || !VT2.isInteger()) |
32660 | return false; |
32661 | |
32662 | switch (VT1.getSimpleVT().SimpleTy) { |
32663 | default: break; |
32664 | case MVT::i8: |
32665 | case MVT::i16: |
32666 | case MVT::i32: |
32667 | |
32668 | return true; |
32669 | } |
32670 | |
32671 | return false; |
32672 | } |
32673 | |
32674 | bool X86TargetLowering::shouldSinkOperands(Instruction *I, |
32675 | SmallVectorImpl<Use *> &Ops) const { |
32676 | using namespace llvm::PatternMatch; |
32677 | |
32678 | FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType()); |
32679 | if (!VTy) |
32680 | return false; |
32681 | |
32682 | if (I->getOpcode() == Instruction::Mul && |
32683 | VTy->getElementType()->isIntegerTy(64)) { |
32684 | for (auto &Op : I->operands()) { |
32685 | |
32686 | if (any_of(Ops, [&](Use *U) { return U->get() == Op; })) |
32687 | continue; |
32688 | |
32689 | |
32690 | |
32691 | if (Subtarget.hasSSE41() && |
32692 | match(Op.get(), m_AShr(m_Shl(m_Value(), m_SpecificInt(32)), |
32693 | m_SpecificInt(32)))) { |
32694 | Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0)); |
32695 | Ops.push_back(&Op); |
32696 | } else if (Subtarget.hasSSE2() && |
32697 | match(Op.get(), |
32698 | m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) { |
32699 | Ops.push_back(&Op); |
32700 | } |
32701 | } |
32702 | |
32703 | return !Ops.empty(); |
32704 | } |
32705 | |
32706 | |
32707 | |
32708 | |
32709 | int ShiftAmountOpNum = -1; |
32710 | if (I->isShift()) |
32711 | ShiftAmountOpNum = 1; |
32712 | else if (auto *II = dyn_cast<IntrinsicInst>(I)) { |
32713 | if (II->getIntrinsicID() == Intrinsic::fshl || |
32714 | II->getIntrinsicID() == Intrinsic::fshr) |
32715 | ShiftAmountOpNum = 2; |
32716 | } |
32717 | |
32718 | if (ShiftAmountOpNum == -1) |
32719 | return false; |
32720 | |
32721 | auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum)); |
32722 | if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 && |
32723 | isVectorShiftByScalarCheap(I->getType())) { |
32724 | Ops.push_back(&I->getOperandUse(ShiftAmountOpNum)); |
32725 | return true; |
32726 | } |
32727 | |
32728 | return false; |
32729 | } |
32730 | |
32731 | bool X86TargetLowering::shouldConvertPhiType(Type *From, Type *To) const { |
32732 | if (!Subtarget.is64Bit()) |
32733 | return false; |
32734 | return TargetLowering::shouldConvertPhiType(From, To); |
32735 | } |
32736 | |
32737 | bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { |
32738 | if (isa<MaskedLoadSDNode>(ExtVal.getOperand(0))) |
32739 | return false; |
32740 | |
32741 | EVT SrcVT = ExtVal.getOperand(0).getValueType(); |
32742 | |
32743 | |
32744 | if (SrcVT.getScalarType() == MVT::i1) |
32745 | return false; |
32746 | |
32747 | return true; |
32748 | } |
32749 | |
32750 | bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
32751 | EVT VT) const { |
32752 | if (!Subtarget.hasAnyFMA()) |
32753 | return false; |
32754 | |
32755 | VT = VT.getScalarType(); |
32756 | |
32757 | if (!VT.isSimple()) |
32758 | return false; |
32759 | |
32760 | switch (VT.getSimpleVT().SimpleTy) { |
32761 | case MVT::f16: |
32762 | return Subtarget.hasFP16(); |
32763 | case MVT::f32: |
32764 | case MVT::f64: |
32765 | return true; |
32766 | default: |
32767 | break; |
32768 | } |
32769 | |
32770 | return false; |
32771 | } |
32772 | |
32773 | bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { |
32774 | |
32775 | return !(VT1 == MVT::i32 && VT2 == MVT::i16); |
32776 | } |
32777 | |
32778 | |
32779 | |
32780 | |
32781 | |
32782 | bool X86TargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const { |
32783 | if (!VT.isSimple()) |
32784 | return false; |
32785 | |
32786 | |
32787 | if (VT.getSimpleVT().getScalarType() == MVT::i1) |
32788 | return false; |
32789 | |
32790 | |
32791 | if (VT.getSimpleVT().getSizeInBits() == 64) |
32792 | return false; |
32793 | |
32794 | |
32795 | |
32796 | return isTypeLegal(VT.getSimpleVT()); |
32797 | } |
32798 | |
32799 | bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask, |
32800 | EVT VT) const { |
32801 | |
32802 | |
32803 | if (!Subtarget.hasAVX2()) |
32804 | if (VT == MVT::v32i8 || VT == MVT::v16i16) |
32805 | return false; |
32806 | |
32807 | |
32808 | return isShuffleMaskLegal(Mask, VT); |
32809 | } |
32810 | |
32811 | bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { |
32812 | |
32813 | if (Subtarget.useIndirectThunkBranches()) |
32814 | return false; |
32815 | |
32816 | |
32817 | return TargetLowering::areJTsAllowed(Fn); |
32818 | } |
32819 | |
32820 | |
32821 | |
32822 | |
32823 | |
32824 | |
32825 | |
32826 | static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr, |
32827 | MachineBasicBlock *BB) { |
32828 | |
32829 | for (MachineBasicBlock::iterator miI = std::next(Itr), miE = BB->end(); |
32830 | miI != miE; ++miI) { |
32831 | const MachineInstr& mi = *miI; |
32832 | if (mi.readsRegister(X86::EFLAGS)) |
32833 | return true; |
32834 | |
32835 | if (mi.definesRegister(X86::EFLAGS)) |
32836 | return false; |
32837 | } |
32838 | |
32839 | |
32840 | |
32841 | for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), |
32842 | sEnd = BB->succ_end(); |
32843 | sItr != sEnd; ++sItr) { |
32844 | MachineBasicBlock* succ = *sItr; |
32845 | if (succ->isLiveIn(X86::EFLAGS)) |
32846 | return true; |
32847 | } |
32848 | |
32849 | return false; |
32850 | } |
32851 | |
32852 | |
32853 | static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, |
32854 | const TargetInstrInfo *TII) { |
32855 | const DebugLoc &DL = MI.getDebugLoc(); |
32856 | |
32857 | const BasicBlock *BB = MBB->getBasicBlock(); |
32858 | MachineFunction::iterator I = ++MBB->getIterator(); |
32859 | |
32860 | |
32861 | |
32862 | |
32863 | |
32864 | |
32865 | |
32866 | |
32867 | |
32868 | |
32869 | |
32870 | |
32871 | |
32872 | |
32873 | |
32874 | |
32875 | MachineBasicBlock *thisMBB = MBB; |
32876 | MachineFunction *MF = MBB->getParent(); |
32877 | MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); |
32878 | MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); |
32879 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
32880 | MF->insert(I, mainMBB); |
32881 | MF->insert(I, fallMBB); |
32882 | MF->insert(I, sinkMBB); |
32883 | |
32884 | if (isEFLAGSLiveAfter(MI, MBB)) { |
32885 | mainMBB->addLiveIn(X86::EFLAGS); |
32886 | fallMBB->addLiveIn(X86::EFLAGS); |
32887 | sinkMBB->addLiveIn(X86::EFLAGS); |
32888 | } |
32889 | |
32890 | |
32891 | sinkMBB->splice(sinkMBB->begin(), MBB, |
32892 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
32893 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
32894 | |
32895 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
32896 | Register DstReg = MI.getOperand(0).getReg(); |
32897 | const TargetRegisterClass *RC = MRI.getRegClass(DstReg); |
32898 | Register mainDstReg = MRI.createVirtualRegister(RC); |
32899 | Register fallDstReg = MRI.createVirtualRegister(RC); |
32900 | |
32901 | |
32902 | |
32903 | |
32904 | |
32905 | BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB); |
32906 | thisMBB->addSuccessor(mainMBB); |
32907 | thisMBB->addSuccessor(fallMBB); |
32908 | |
32909 | |
32910 | |
32911 | BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1); |
32912 | BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); |
32913 | mainMBB->addSuccessor(sinkMBB); |
32914 | |
32915 | |
32916 | |
32917 | |
32918 | |
32919 | BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF)); |
32920 | BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg) |
32921 | .addReg(X86::EAX); |
32922 | fallMBB->addSuccessor(sinkMBB); |
32923 | |
32924 | |
32925 | |
32926 | BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) |
32927 | .addReg(mainDstReg).addMBB(mainMBB) |
32928 | .addReg(fallDstReg).addMBB(fallMBB); |
32929 | |
32930 | MI.eraseFromParent(); |
32931 | return sinkMBB; |
32932 | } |
32933 | |
32934 | MachineBasicBlock * |
32935 | X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI, |
32936 | MachineBasicBlock *MBB) const { |
32937 | |
32938 | |
32939 | |
32940 | |
32941 | |
32942 | |
32943 | |
32944 | |
32945 | |
32946 | |
32947 | assert(MI.getNumOperands() == 10 && "VAARG should have 10 operands!"); |
32948 | static_assert(X86::AddrNumOperands == 5, "VAARG assumes 5 address operands"); |
32949 | |
32950 | Register DestReg = MI.getOperand(0).getReg(); |
32951 | MachineOperand &Base = MI.getOperand(1); |
32952 | MachineOperand &Scale = MI.getOperand(2); |
32953 | MachineOperand &Index = MI.getOperand(3); |
32954 | MachineOperand &Disp = MI.getOperand(4); |
32955 | MachineOperand &Segment = MI.getOperand(5); |
32956 | unsigned ArgSize = MI.getOperand(6).getImm(); |
32957 | unsigned ArgMode = MI.getOperand(7).getImm(); |
32958 | Align Alignment = Align(MI.getOperand(8).getImm()); |
32959 | |
32960 | MachineFunction *MF = MBB->getParent(); |
32961 | |
32962 | |
32963 | assert(MI.hasOneMemOperand() && "Expected VAARG to have one memoperand"); |
32964 | |
32965 | MachineMemOperand *OldMMO = MI.memoperands().front(); |
32966 | |
32967 | |
32968 | MachineMemOperand *LoadOnlyMMO = MF->getMachineMemOperand( |
32969 | OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOStore); |
32970 | MachineMemOperand *StoreOnlyMMO = MF->getMachineMemOperand( |
32971 | OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOLoad); |
32972 | |
32973 | |
32974 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
32975 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
32976 | const TargetRegisterClass *AddrRegClass = |
32977 | getRegClassFor(getPointerTy(MBB->getParent()->getDataLayout())); |
32978 | const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); |
32979 | const DebugLoc &DL = MI.getDebugLoc(); |
32980 | |
32981 | |
32982 | |
32983 | |
32984 | |
32985 | |
32986 | |
32987 | |
32988 | |
32989 | |
32990 | unsigned TotalNumIntRegs = 6; |
32991 | unsigned TotalNumXMMRegs = 8; |
32992 | bool UseGPOffset = (ArgMode == 1); |
32993 | bool UseFPOffset = (ArgMode == 2); |
32994 | unsigned MaxOffset = TotalNumIntRegs * 8 + |
32995 | (UseFPOffset ? TotalNumXMMRegs * 16 : 0); |
32996 | |
32997 | |
32998 | unsigned ArgSizeA8 = (ArgSize + 7) & ~7; |
32999 | bool NeedsAlign = (Alignment > 8); |
33000 | |
33001 | MachineBasicBlock *thisMBB = MBB; |
33002 | MachineBasicBlock *overflowMBB; |
33003 | MachineBasicBlock *offsetMBB; |
33004 | MachineBasicBlock *endMBB; |
33005 | |
33006 | unsigned OffsetDestReg = 0; |
33007 | unsigned OverflowDestReg = 0; |
33008 | unsigned OffsetReg = 0; |
33009 | |
33010 | if (!UseGPOffset && !UseFPOffset) { |
33011 | |
33012 | |
33013 | OffsetDestReg = 0; |
33014 | OverflowDestReg = DestReg; |
33015 | |
33016 | offsetMBB = nullptr; |
33017 | overflowMBB = thisMBB; |
33018 | endMBB = thisMBB; |
33019 | } else { |
33020 | |
33021 | |
33022 | |
33023 | |
33024 | |
33025 | |
33026 | |
33027 | |
33028 | |
33029 | |
33030 | |
33031 | |
33032 | |
33033 | OffsetDestReg = MRI.createVirtualRegister(AddrRegClass); |
33034 | OverflowDestReg = MRI.createVirtualRegister(AddrRegClass); |
33035 | |
33036 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
33037 | overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33038 | offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33039 | endMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33040 | |
33041 | MachineFunction::iterator MBBIter = ++MBB->getIterator(); |
33042 | |
33043 | |
33044 | MF->insert(MBBIter, offsetMBB); |
33045 | MF->insert(MBBIter, overflowMBB); |
33046 | MF->insert(MBBIter, endMBB); |
33047 | |
33048 | |
33049 | endMBB->splice(endMBB->begin(), thisMBB, |
33050 | std::next(MachineBasicBlock::iterator(MI)), thisMBB->end()); |
33051 | endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); |
33052 | |
33053 | |
33054 | thisMBB->addSuccessor(offsetMBB); |
33055 | thisMBB->addSuccessor(overflowMBB); |
33056 | |
33057 | |
33058 | offsetMBB->addSuccessor(endMBB); |
33059 | overflowMBB->addSuccessor(endMBB); |
33060 | |
33061 | |
33062 | OffsetReg = MRI.createVirtualRegister(OffsetRegClass); |
33063 | BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) |
33064 | .add(Base) |
33065 | .add(Scale) |
33066 | .add(Index) |
33067 | .addDisp(Disp, UseFPOffset ? 4 : 0) |
33068 | .add(Segment) |
33069 | .setMemRefs(LoadOnlyMMO); |
33070 | |
33071 | |
33072 | BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) |
33073 | .addReg(OffsetReg) |
33074 | .addImm(MaxOffset + 8 - ArgSizeA8); |
33075 | |
33076 | |
33077 | |
33078 | BuildMI(thisMBB, DL, TII->get(X86::JCC_1)) |
33079 | .addMBB(overflowMBB).addImm(X86::COND_AE); |
33080 | } |
33081 | |
33082 | |
33083 | if (offsetMBB) { |
33084 | assert(OffsetReg != 0); |
33085 | |
33086 | |
33087 | Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass); |
33088 | BuildMI( |
33089 | offsetMBB, DL, |
33090 | TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm), |
33091 | RegSaveReg) |
33092 | .add(Base) |
33093 | .add(Scale) |
33094 | .add(Index) |
33095 | .addDisp(Disp, Subtarget.isTarget64BitLP64() ? 16 : 12) |
33096 | .add(Segment) |
33097 | .setMemRefs(LoadOnlyMMO); |
33098 | |
33099 | if (Subtarget.isTarget64BitLP64()) { |
33100 | |
33101 | Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); |
33102 | BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) |
33103 | .addImm(0) |
33104 | .addReg(OffsetReg) |
33105 | .addImm(X86::sub_32bit); |
33106 | |
33107 | |
33108 | BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) |
33109 | .addReg(OffsetReg64) |
33110 | .addReg(RegSaveReg); |
33111 | } else { |
33112 | |
33113 | BuildMI(offsetMBB, DL, TII->get(X86::ADD32rr), OffsetDestReg) |
33114 | .addReg(OffsetReg) |
33115 | .addReg(RegSaveReg); |
33116 | } |
33117 | |
33118 | |
33119 | Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); |
33120 | BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) |
33121 | .addReg(OffsetReg) |
33122 | .addImm(UseFPOffset ? 16 : 8); |
33123 | |
33124 | |
33125 | BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) |
33126 | .add(Base) |
33127 | .add(Scale) |
33128 | .add(Index) |
33129 | .addDisp(Disp, UseFPOffset ? 4 : 0) |
33130 | .add(Segment) |
33131 | .addReg(NextOffsetReg) |
33132 | .setMemRefs(StoreOnlyMMO); |
33133 | |
33134 | |
33135 | BuildMI(offsetMBB, DL, TII->get(X86::JMP_1)) |
33136 | .addMBB(endMBB); |
33137 | } |
33138 | |
33139 | |
33140 | |
33141 | |
33142 | |
33143 | |
33144 | Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); |
33145 | BuildMI(overflowMBB, DL, |
33146 | TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm), |
33147 | OverflowAddrReg) |
33148 | .add(Base) |
33149 | .add(Scale) |
33150 | .add(Index) |
33151 | .addDisp(Disp, 8) |
33152 | .add(Segment) |
33153 | .setMemRefs(LoadOnlyMMO); |
33154 | |
33155 | |
33156 | |
33157 | if (NeedsAlign) { |
33158 | |
33159 | Register TmpReg = MRI.createVirtualRegister(AddrRegClass); |
33160 | |
33161 | |
33162 | BuildMI( |
33163 | overflowMBB, DL, |
33164 | TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri), |
33165 | TmpReg) |
33166 | .addReg(OverflowAddrReg) |
33167 | .addImm(Alignment.value() - 1); |
33168 | |
33169 | BuildMI( |
33170 | overflowMBB, DL, |
33171 | TII->get(Subtarget.isTarget64BitLP64() ? X86::AND64ri32 : X86::AND32ri), |
33172 | OverflowDestReg) |
33173 | .addReg(TmpReg) |
33174 | .addImm(~(uint64_t)(Alignment.value() - 1)); |
33175 | } else { |
33176 | BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) |
33177 | .addReg(OverflowAddrReg); |
33178 | } |
33179 | |
33180 | |
33181 | |
33182 | Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass); |
33183 | BuildMI( |
33184 | overflowMBB, DL, |
33185 | TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri), |
33186 | NextAddrReg) |
33187 | .addReg(OverflowDestReg) |
33188 | .addImm(ArgSizeA8); |
33189 | |
33190 | |
33191 | BuildMI(overflowMBB, DL, |
33192 | TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64mr : X86::MOV32mr)) |
33193 | .add(Base) |
33194 | .add(Scale) |
33195 | .add(Index) |
33196 | .addDisp(Disp, 8) |
33197 | .add(Segment) |
33198 | .addReg(NextAddrReg) |
33199 | .setMemRefs(StoreOnlyMMO); |
33200 | |
33201 | |
33202 | if (offsetMBB) { |
33203 | BuildMI(*endMBB, endMBB->begin(), DL, |
33204 | TII->get(X86::PHI), DestReg) |
33205 | .addReg(OffsetDestReg).addMBB(offsetMBB) |
33206 | .addReg(OverflowDestReg).addMBB(overflowMBB); |
33207 | } |
33208 | |
33209 | |
33210 | MI.eraseFromParent(); |
33211 | |
33212 | return endMBB; |
33213 | } |
33214 | |
33215 | |
33216 | |
33217 | |
33218 | |
33219 | |
33220 | static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, |
33221 | MachineBasicBlock* BB, |
33222 | const TargetRegisterInfo* TRI) { |
33223 | if (isEFLAGSLiveAfter(SelectItr, BB)) |
33224 | return false; |
33225 | |
33226 | |
33227 | |
33228 | SelectItr->addRegisterKilled(X86::EFLAGS, TRI); |
33229 | return true; |
33230 | } |
33231 | |
33232 | |
33233 | |
33234 | |
33235 | static bool isCMOVPseudo(MachineInstr &MI) { |
33236 | switch (MI.getOpcode()) { |
33237 | case X86::CMOV_FR16X: |
33238 | case X86::CMOV_FR32: |
33239 | case X86::CMOV_FR32X: |
33240 | case X86::CMOV_FR64: |
33241 | case X86::CMOV_FR64X: |
33242 | case X86::CMOV_GR8: |
33243 | case X86::CMOV_GR16: |
33244 | case X86::CMOV_GR32: |
33245 | case X86::CMOV_RFP32: |
33246 | case X86::CMOV_RFP64: |
33247 | case X86::CMOV_RFP80: |
33248 | case X86::CMOV_VR64: |
33249 | case X86::CMOV_VR128: |
33250 | case X86::CMOV_VR128X: |
33251 | case X86::CMOV_VR256: |
33252 | case X86::CMOV_VR256X: |
33253 | case X86::CMOV_VR512: |
33254 | case X86::CMOV_VK1: |
33255 | case X86::CMOV_VK2: |
33256 | case X86::CMOV_VK4: |
33257 | case X86::CMOV_VK8: |
33258 | case X86::CMOV_VK16: |
33259 | case X86::CMOV_VK32: |
33260 | case X86::CMOV_VK64: |
33261 | return true; |
33262 | |
33263 | default: |
33264 | return false; |
33265 | } |
33266 | } |
33267 | |
33268 | |
33269 | |
33270 | |
33271 | |
33272 | |
33273 | static MachineInstrBuilder createPHIsForCMOVsInSinkBB( |
33274 | MachineBasicBlock::iterator MIItBegin, MachineBasicBlock::iterator MIItEnd, |
33275 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, |
33276 | MachineBasicBlock *SinkMBB) { |
33277 | MachineFunction *MF = TrueMBB->getParent(); |
33278 | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); |
33279 | const DebugLoc &DL = MIItBegin->getDebugLoc(); |
33280 | |
33281 | X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm()); |
33282 | X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); |
33283 | |
33284 | MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); |
33285 | |
33286 | |
33287 | |
33288 | |
33289 | |
33290 | |
33291 | |
33292 | DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; |
33293 | MachineInstrBuilder MIB; |
33294 | |
33295 | for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { |
33296 | Register DestReg = MIIt->getOperand(0).getReg(); |
33297 | Register Op1Reg = MIIt->getOperand(1).getReg(); |
33298 | Register Op2Reg = MIIt->getOperand(2).getReg(); |
33299 | |
33300 | |
33301 | |
33302 | |
33303 | if (MIIt->getOperand(3).getImm() == OppCC) |
33304 | std::swap(Op1Reg, Op2Reg); |
33305 | |
33306 | if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end()) |
33307 | Op1Reg = RegRewriteTable[Op1Reg].first; |
33308 | |
33309 | if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end()) |
33310 | Op2Reg = RegRewriteTable[Op2Reg].second; |
33311 | |
33312 | MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) |
33313 | .addReg(Op1Reg) |
33314 | .addMBB(FalseMBB) |
33315 | .addReg(Op2Reg) |
33316 | .addMBB(TrueMBB); |
33317 | |
33318 | |
33319 | RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); |
33320 | } |
33321 | |
33322 | return MIB; |
33323 | } |
33324 | |
33325 | |
33326 | MachineBasicBlock * |
33327 | X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV, |
33328 | MachineInstr &SecondCascadedCMOV, |
33329 | MachineBasicBlock *ThisMBB) const { |
33330 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
33331 | const DebugLoc &DL = FirstCMOV.getDebugLoc(); |
33332 | |
33333 | |
33334 | |
33335 | |
33336 | |
33337 | |
33338 | |
33339 | |
33340 | |
33341 | |
33342 | |
33343 | |
33344 | |
33345 | |
33346 | |
33347 | |
33348 | |
33349 | |
33350 | |
33351 | |
33352 | |
33353 | |
33354 | |
33355 | |
33356 | |
33357 | |
33358 | |
33359 | |
33360 | |
33361 | |
33362 | |
33363 | |
33364 | |
33365 | |
33366 | |
33367 | |
33368 | |
33369 | |
33370 | |
33371 | |
33372 | |
33373 | |
33374 | |
33375 | |
33376 | |
33377 | |
33378 | |
33379 | |
33380 | |
33381 | |
33382 | |
33383 | |
33384 | |
33385 | |
33386 | |
33387 | |
33388 | |
33389 | |
33390 | |
33391 | |
33392 | |
33393 | |
33394 | |
33395 | |
33396 | |
33397 | |
33398 | |
33399 | |
33400 | |
33401 | |
33402 | |
33403 | |
33404 | const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); |
33405 | MachineFunction *F = ThisMBB->getParent(); |
33406 | MachineBasicBlock *FirstInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); |
33407 | MachineBasicBlock *SecondInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); |
33408 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); |
33409 | |
33410 | MachineFunction::iterator It = ++ThisMBB->getIterator(); |
33411 | F->insert(It, FirstInsertedMBB); |
33412 | F->insert(It, SecondInsertedMBB); |
33413 | F->insert(It, SinkMBB); |
33414 | |
33415 | |
33416 | |
33417 | |
33418 | FirstInsertedMBB->addLiveIn(X86::EFLAGS); |
33419 | |
33420 | |
33421 | |
33422 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
33423 | if (!SecondCascadedCMOV.killsRegister(X86::EFLAGS) && |
33424 | !checkAndUpdateEFLAGSKill(SecondCascadedCMOV, ThisMBB, TRI)) { |
33425 | SecondInsertedMBB->addLiveIn(X86::EFLAGS); |
33426 | SinkMBB->addLiveIn(X86::EFLAGS); |
33427 | } |
33428 | |
33429 | |
33430 | SinkMBB->splice(SinkMBB->begin(), ThisMBB, |
33431 | std::next(MachineBasicBlock::iterator(FirstCMOV)), |
33432 | ThisMBB->end()); |
33433 | SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); |
33434 | |
33435 | |
33436 | ThisMBB->addSuccessor(FirstInsertedMBB); |
33437 | |
33438 | ThisMBB->addSuccessor(SinkMBB); |
33439 | |
33440 | FirstInsertedMBB->addSuccessor(SecondInsertedMBB); |
33441 | |
33442 | FirstInsertedMBB->addSuccessor(SinkMBB); |
33443 | |
33444 | SecondInsertedMBB->addSuccessor(SinkMBB); |
33445 | |
33446 | |
33447 | X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm()); |
33448 | BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC); |
33449 | |
33450 | X86::CondCode SecondCC = |
33451 | X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm()); |
33452 | BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC); |
33453 | |
33454 | |
33455 | |
33456 | Register DestReg = FirstCMOV.getOperand(0).getReg(); |
33457 | Register Op1Reg = FirstCMOV.getOperand(1).getReg(); |
33458 | Register Op2Reg = FirstCMOV.getOperand(2).getReg(); |
33459 | MachineInstrBuilder MIB = |
33460 | BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg) |
33461 | .addReg(Op1Reg) |
33462 | .addMBB(SecondInsertedMBB) |
33463 | .addReg(Op2Reg) |
33464 | .addMBB(ThisMBB); |
33465 | |
33466 | |
33467 | |
33468 | MIB.addReg(FirstCMOV.getOperand(2).getReg()).addMBB(FirstInsertedMBB); |
33469 | |
33470 | BuildMI(*SinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), DL, |
33471 | TII->get(TargetOpcode::COPY), |
33472 | SecondCascadedCMOV.getOperand(0).getReg()) |
33473 | .addReg(FirstCMOV.getOperand(0).getReg()); |
33474 | |
33475 | |
33476 | FirstCMOV.eraseFromParent(); |
33477 | SecondCascadedCMOV.eraseFromParent(); |
33478 | |
33479 | return SinkMBB; |
33480 | } |
33481 | |
33482 | MachineBasicBlock * |
33483 | X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, |
33484 | MachineBasicBlock *ThisMBB) const { |
33485 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
33486 | const DebugLoc &DL = MI.getDebugLoc(); |
33487 | |
33488 | |
33489 | |
33490 | |
33491 | |
33492 | |
33493 | |
33494 | |
33495 | |
33496 | |
33497 | |
33498 | |
33499 | |
33500 | |
33501 | |
33502 | |
33503 | |
33504 | |
33505 | |
33506 | |
33507 | |
33508 | |
33509 | |
33510 | |
33511 | |
33512 | |
33513 | |
33514 | |
33515 | |
33516 | |
33517 | |
33518 | |
33519 | |
33520 | |
33521 | |
33522 | |
33523 | |
33524 | |
33525 | |
33526 | |
33527 | |
33528 | |
33529 | |
33530 | |
33531 | |
33532 | |
33533 | |
33534 | |
33535 | X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm()); |
33536 | X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); |
33537 | MachineInstr *LastCMOV = &MI; |
33538 | MachineBasicBlock::iterator NextMIIt = MachineBasicBlock::iterator(MI); |
33539 | |
33540 | |
33541 | |
33542 | |
33543 | |
33544 | if (isCMOVPseudo(MI)) { |
33545 | |
33546 | |
33547 | while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) && |
33548 | (NextMIIt->getOperand(3).getImm() == CC || |
33549 | NextMIIt->getOperand(3).getImm() == OppCC)) { |
33550 | LastCMOV = &*NextMIIt; |
33551 | NextMIIt = next_nodbg(NextMIIt, ThisMBB->end()); |
33552 | } |
33553 | } |
33554 | |
33555 | |
33556 | |
33557 | if (LastCMOV == &MI && NextMIIt != ThisMBB->end() && |
33558 | NextMIIt->getOpcode() == MI.getOpcode() && |
33559 | NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() && |
33560 | NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() && |
33561 | NextMIIt->getOperand(1).isKill()) { |
33562 | return EmitLoweredCascadedSelect(MI, *NextMIIt, ThisMBB); |
33563 | } |
33564 | |
33565 | const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); |
33566 | MachineFunction *F = ThisMBB->getParent(); |
33567 | MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); |
33568 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); |
33569 | |
33570 | MachineFunction::iterator It = ++ThisMBB->getIterator(); |
33571 | F->insert(It, FalseMBB); |
33572 | F->insert(It, SinkMBB); |
33573 | |
33574 | |
33575 | |
33576 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
33577 | if (!LastCMOV->killsRegister(X86::EFLAGS) && |
33578 | !checkAndUpdateEFLAGSKill(LastCMOV, ThisMBB, TRI)) { |
33579 | FalseMBB->addLiveIn(X86::EFLAGS); |
33580 | SinkMBB->addLiveIn(X86::EFLAGS); |
33581 | } |
33582 | |
33583 | |
33584 | auto DbgEnd = MachineBasicBlock::iterator(LastCMOV); |
33585 | auto DbgIt = MachineBasicBlock::iterator(MI); |
33586 | while (DbgIt != DbgEnd) { |
33587 | auto Next = std::next(DbgIt); |
33588 | if (DbgIt->isDebugInstr()) |
33589 | SinkMBB->push_back(DbgIt->removeFromParent()); |
33590 | DbgIt = Next; |
33591 | } |
33592 | |
33593 | |
33594 | SinkMBB->splice(SinkMBB->end(), ThisMBB, |
33595 | std::next(MachineBasicBlock::iterator(LastCMOV)), |
33596 | ThisMBB->end()); |
33597 | SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); |
33598 | |
33599 | |
33600 | ThisMBB->addSuccessor(FalseMBB); |
33601 | |
33602 | ThisMBB->addSuccessor(SinkMBB); |
33603 | |
33604 | FalseMBB->addSuccessor(SinkMBB); |
33605 | |
33606 | |
33607 | BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC); |
33608 | |
33609 | |
33610 | |
33611 | |
33612 | MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); |
33613 | MachineBasicBlock::iterator MIItEnd = |
33614 | std::next(MachineBasicBlock::iterator(LastCMOV)); |
33615 | createPHIsForCMOVsInSinkBB(MIItBegin, MIItEnd, ThisMBB, FalseMBB, SinkMBB); |
33616 | |
33617 | |
33618 | ThisMBB->erase(MIItBegin, MIItEnd); |
33619 | |
33620 | return SinkMBB; |
33621 | } |
33622 | |
33623 | static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { |
33624 | if (IsLP64) { |
33625 | if (isInt<8>(Imm)) |
33626 | return X86::SUB64ri8; |
33627 | return X86::SUB64ri32; |
33628 | } else { |
33629 | if (isInt<8>(Imm)) |
33630 | return X86::SUB32ri8; |
33631 | return X86::SUB32ri; |
33632 | } |
33633 | } |
33634 | |
33635 | MachineBasicBlock * |
33636 | X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, |
33637 | MachineBasicBlock *MBB) const { |
33638 | MachineFunction *MF = MBB->getParent(); |
33639 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
33640 | const X86FrameLowering &TFI = *Subtarget.getFrameLowering(); |
33641 | const DebugLoc &DL = MI.getDebugLoc(); |
33642 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
33643 | |
33644 | const unsigned ProbeSize = getStackProbeSize(*MF); |
33645 | |
33646 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
33647 | MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33648 | MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33649 | MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33650 | |
33651 | MachineFunction::iterator MBBIter = ++MBB->getIterator(); |
33652 | MF->insert(MBBIter, testMBB); |
33653 | MF->insert(MBBIter, blockMBB); |
33654 | MF->insert(MBBIter, tailMBB); |
33655 | |
33656 | Register sizeVReg = MI.getOperand(1).getReg(); |
33657 | |
33658 | Register physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP; |
33659 | |
33660 | Register TmpStackPtr = MRI.createVirtualRegister( |
33661 | TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); |
33662 | Register FinalStackPtr = MRI.createVirtualRegister( |
33663 | TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); |
33664 | |
33665 | BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr) |
33666 | .addReg(physSPReg); |
33667 | { |
33668 | const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr; |
33669 | BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr) |
33670 | .addReg(TmpStackPtr) |
33671 | .addReg(sizeVReg); |
33672 | } |
33673 | |
33674 | |
33675 | |
33676 | BuildMI(testMBB, DL, |
33677 | TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
33678 | .addReg(FinalStackPtr) |
33679 | .addReg(physSPReg); |
33680 | |
33681 | BuildMI(testMBB, DL, TII->get(X86::JCC_1)) |
33682 | .addMBB(tailMBB) |
33683 | .addImm(X86::COND_GE); |
33684 | testMBB->addSuccessor(blockMBB); |
33685 | testMBB->addSuccessor(tailMBB); |
33686 | |
33687 | |
33688 | |
33689 | |
33690 | |
33691 | |
33692 | |
33693 | |
33694 | |
33695 | |
33696 | |
33697 | |
33698 | |
33699 | const unsigned XORMIOpc = |
33700 | TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8; |
33701 | addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0) |
33702 | .addImm(0); |
33703 | |
33704 | BuildMI(blockMBB, DL, |
33705 | TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg) |
33706 | .addReg(physSPReg) |
33707 | .addImm(ProbeSize); |
33708 | |
33709 | |
33710 | BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB); |
33711 | blockMBB->addSuccessor(testMBB); |
33712 | |
33713 | |
33714 | BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) |
33715 | .addReg(FinalStackPtr); |
33716 | |
33717 | tailMBB->splice(tailMBB->end(), MBB, |
33718 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
33719 | tailMBB->transferSuccessorsAndUpdatePHIs(MBB); |
33720 | MBB->addSuccessor(testMBB); |
33721 | |
33722 | |
33723 | MI.eraseFromParent(); |
33724 | |
33725 | |
33726 | return tailMBB; |
33727 | } |
33728 | |
33729 | MachineBasicBlock * |
33730 | X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI, |
33731 | MachineBasicBlock *BB) const { |
33732 | MachineFunction *MF = BB->getParent(); |
33733 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
33734 | const DebugLoc &DL = MI.getDebugLoc(); |
33735 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
33736 | |
33737 | assert(MF->shouldSplitStack()); |
33738 | |
33739 | const bool Is64Bit = Subtarget.is64Bit(); |
33740 | const bool IsLP64 = Subtarget.isTarget64BitLP64(); |
33741 | |
33742 | const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; |
33743 | const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30; |
33744 | |
33745 | |
33746 | |
33747 | |
33748 | |
33749 | |
33750 | |
33751 | |
33752 | |
33753 | |
33754 | |
33755 | |
33756 | |
33757 | |
33758 | |
33759 | |
33760 | |
33761 | MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33762 | MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33763 | MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33764 | |
33765 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
33766 | const TargetRegisterClass *AddrRegClass = |
33767 | getRegClassFor(getPointerTy(MF->getDataLayout())); |
33768 | |
33769 | Register mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), |
33770 | bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), |
33771 | tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), |
33772 | SPLimitVReg = MRI.createVirtualRegister(AddrRegClass), |
33773 | sizeVReg = MI.getOperand(1).getReg(), |
33774 | physSPReg = |
33775 | IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP; |
33776 | |
33777 | MachineFunction::iterator MBBIter = ++BB->getIterator(); |
33778 | |
33779 | MF->insert(MBBIter, bumpMBB); |
33780 | MF->insert(MBBIter, mallocMBB); |
33781 | MF->insert(MBBIter, continueMBB); |
33782 | |
33783 | continueMBB->splice(continueMBB->begin(), BB, |
33784 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
33785 | continueMBB->transferSuccessorsAndUpdatePHIs(BB); |
33786 | |
33787 | |
33788 | |
33789 | BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); |
33790 | BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) |
33791 | .addReg(tmpSPVReg).addReg(sizeVReg); |
33792 | BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr)) |
33793 | .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg) |
33794 | .addReg(SPLimitVReg); |
33795 | BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G); |
33796 | |
33797 | |
33798 | |
33799 | BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg) |
33800 | .addReg(SPLimitVReg); |
33801 | BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) |
33802 | .addReg(SPLimitVReg); |
33803 | BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); |
33804 | |
33805 | |
33806 | const uint32_t *RegMask = |
33807 | Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); |
33808 | if (IsLP64) { |
33809 | BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) |
33810 | .addReg(sizeVReg); |
33811 | BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) |
33812 | .addExternalSymbol("__morestack_allocate_stack_space") |
33813 | .addRegMask(RegMask) |
33814 | .addReg(X86::RDI, RegState::Implicit) |
33815 | .addReg(X86::RAX, RegState::ImplicitDefine); |
33816 | } else if (Is64Bit) { |
33817 | BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI) |
33818 | .addReg(sizeVReg); |
33819 | BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) |
33820 | .addExternalSymbol("__morestack_allocate_stack_space") |
33821 | .addRegMask(RegMask) |
33822 | .addReg(X86::EDI, RegState::Implicit) |
33823 | .addReg(X86::EAX, RegState::ImplicitDefine); |
33824 | } else { |
33825 | BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) |
33826 | .addImm(12); |
33827 | BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg); |
33828 | BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32)) |
33829 | .addExternalSymbol("__morestack_allocate_stack_space") |
33830 | .addRegMask(RegMask) |
33831 | .addReg(X86::EAX, RegState::ImplicitDefine); |
33832 | } |
33833 | |
33834 | if (!Is64Bit) |
33835 | BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg) |
33836 | .addImm(16); |
33837 | |
33838 | BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg) |
33839 | .addReg(IsLP64 ? X86::RAX : X86::EAX); |
33840 | BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); |
33841 | |
33842 | |
33843 | BB->addSuccessor(bumpMBB); |
33844 | BB->addSuccessor(mallocMBB); |
33845 | mallocMBB->addSuccessor(continueMBB); |
33846 | bumpMBB->addSuccessor(continueMBB); |
33847 | |
33848 | |
33849 | BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI), |
33850 | MI.getOperand(0).getReg()) |
33851 | .addReg(mallocPtrVReg) |
33852 | .addMBB(mallocMBB) |
33853 | .addReg(bumpSPPtrVReg) |
33854 | .addMBB(bumpMBB); |
33855 | |
33856 | |
33857 | MI.eraseFromParent(); |
33858 | |
33859 | |
33860 | return continueMBB; |
33861 | } |
33862 | |
33863 | MachineBasicBlock * |
33864 | X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI, |
33865 | MachineBasicBlock *BB) const { |
33866 | MachineFunction *MF = BB->getParent(); |
33867 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
33868 | MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); |
33869 | const DebugLoc &DL = MI.getDebugLoc(); |
33870 | |
33871 | assert(!isAsynchronousEHPersonality( |
33872 | classifyEHPersonality(MF->getFunction().getPersonalityFn())) && |
33873 | "SEH does not use catchret!"); |
33874 | |
33875 | |
33876 | if (!Subtarget.is32Bit()) |
33877 | return BB; |
33878 | |
33879 | |
33880 | |
33881 | MachineBasicBlock *RestoreMBB = |
33882 | MF->CreateMachineBasicBlock(BB->getBasicBlock()); |
33883 | assert(BB->succ_size() == 1); |
33884 | MF->insert(std::next(BB->getIterator()), RestoreMBB); |
33885 | RestoreMBB->transferSuccessorsAndUpdatePHIs(BB); |
33886 | BB->addSuccessor(RestoreMBB); |
33887 | MI.getOperand(0).setMBB(RestoreMBB); |
33888 | |
33889 | |
33890 | |
33891 | RestoreMBB->setIsEHPad(true); |
33892 | |
33893 | auto RestoreMBBI = RestoreMBB->begin(); |
33894 | BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB); |
33895 | return BB; |
33896 | } |
33897 | |
33898 | MachineBasicBlock * |
33899 | X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI, |
33900 | MachineBasicBlock *BB) const { |
33901 | |
33902 | |
33903 | |
33904 | |
33905 | |
33906 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
33907 | const DebugLoc &DL = MI.getDebugLoc(); |
33908 | MachineFunction &MF = *BB->getParent(); |
33909 | |
33910 | |
33911 | unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
33912 | MachineInstrBuilder CallseqStart = |
33913 | BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0); |
33914 | BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); |
33915 | |
33916 | |
33917 | |
33918 | |
33919 | unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); |
33920 | MachineInstrBuilder CallseqEnd = |
33921 | BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0); |
33922 | BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); |
33923 | |
33924 | return BB; |
33925 | } |
33926 | |
33927 | MachineBasicBlock * |
33928 | X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, |
33929 | MachineBasicBlock *BB) const { |
33930 | |
33931 | |
33932 | |
33933 | |
33934 | MachineFunction *F = BB->getParent(); |
33935 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
33936 | const DebugLoc &DL = MI.getDebugLoc(); |
33937 | |
33938 | assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?"); |
33939 | assert(MI.getOperand(3).isGlobal() && "This should be a global"); |
33940 | |
33941 | |
33942 | |
33943 | |
33944 | const uint32_t *RegMask = |
33945 | Subtarget.is64Bit() ? |
33946 | Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() : |
33947 | Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); |
33948 | if (Subtarget.is64Bit()) { |
33949 | MachineInstrBuilder MIB = |
33950 | BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) |
33951 | .addReg(X86::RIP) |
33952 | .addImm(0) |
33953 | .addReg(0) |
33954 | .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, |
33955 | MI.getOperand(3).getTargetFlags()) |
33956 | .addReg(0); |
33957 | MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); |
33958 | addDirectMem(MIB, X86::RDI); |
33959 | MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask); |
33960 | } else if (!isPositionIndependent()) { |
33961 | MachineInstrBuilder MIB = |
33962 | BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) |
33963 | .addReg(0) |
33964 | .addImm(0) |
33965 | .addReg(0) |
33966 | .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, |
33967 | MI.getOperand(3).getTargetFlags()) |
33968 | .addReg(0); |
33969 | MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); |
33970 | addDirectMem(MIB, X86::EAX); |
33971 | MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); |
33972 | } else { |
33973 | MachineInstrBuilder MIB = |
33974 | BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) |
33975 | .addReg(TII->getGlobalBaseReg(F)) |
33976 | .addImm(0) |
33977 | .addReg(0) |
33978 | .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, |
33979 | MI.getOperand(3).getTargetFlags()) |
33980 | .addReg(0); |
33981 | MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); |
33982 | addDirectMem(MIB, X86::EAX); |
33983 | MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); |
33984 | } |
33985 | |
33986 | MI.eraseFromParent(); |
33987 | return BB; |
33988 | } |
33989 | |
33990 | static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) { |
33991 | switch (RPOpc) { |
33992 | case X86::INDIRECT_THUNK_CALL32: |
33993 | return X86::CALLpcrel32; |
33994 | case X86::INDIRECT_THUNK_CALL64: |
33995 | return X86::CALL64pcrel32; |
33996 | case X86::INDIRECT_THUNK_TCRETURN32: |
33997 | return X86::TCRETURNdi; |
33998 | case X86::INDIRECT_THUNK_TCRETURN64: |
33999 | return X86::TCRETURNdi64; |
34000 | } |
34001 | llvm_unreachable("not indirect thunk opcode"); |
34002 | } |
34003 | |
34004 | static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget, |
34005 | unsigned Reg) { |
34006 | if (Subtarget.useRetpolineExternalThunk()) { |
34007 | |
34008 | |
34009 | |
34010 | |
34011 | |
34012 | |
34013 | |
34014 | |
34015 | |
34016 | |
34017 | |
34018 | |
34019 | |
34020 | switch (Reg) { |
34021 | case X86::EAX: |
34022 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
34023 | return "__x86_indirect_thunk_eax"; |
34024 | case X86::ECX: |
34025 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
34026 | return "__x86_indirect_thunk_ecx"; |
34027 | case X86::EDX: |
34028 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
34029 | return "__x86_indirect_thunk_edx"; |
34030 | case X86::EDI: |
34031 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
34032 | return "__x86_indirect_thunk_edi"; |
34033 | case X86::R11: |
34034 | assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); |
34035 | return "__x86_indirect_thunk_r11"; |
34036 | } |
34037 | llvm_unreachable("unexpected reg for external indirect thunk"); |
34038 | } |
34039 | |
34040 | if (Subtarget.useRetpolineIndirectCalls() || |
34041 | Subtarget.useRetpolineIndirectBranches()) { |
34042 | |
34043 | switch (Reg) { |
34044 | case X86::EAX: |
34045 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
34046 | return "__llvm_retpoline_eax"; |
34047 | case X86::ECX: |
34048 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
34049 | return "__llvm_retpoline_ecx"; |
34050 | case X86::EDX: |
34051 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
34052 | return "__llvm_retpoline_edx"; |
34053 | case X86::EDI: |
34054 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
34055 | return "__llvm_retpoline_edi"; |
34056 | case X86::R11: |
34057 | assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); |
34058 | return "__llvm_retpoline_r11"; |
34059 | } |
34060 | llvm_unreachable("unexpected reg for retpoline"); |
34061 | } |
34062 | |
34063 | if (Subtarget.useLVIControlFlowIntegrity()) { |
34064 | assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); |
34065 | return "__llvm_lvi_thunk_r11"; |
34066 | } |
34067 | llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature"); |
34068 | } |
34069 | |
34070 | MachineBasicBlock * |
34071 | X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI, |
34072 | MachineBasicBlock *BB) const { |
34073 | |
34074 | |
34075 | const DebugLoc &DL = MI.getDebugLoc(); |
34076 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
34077 | Register CalleeVReg = MI.getOperand(0).getReg(); |
34078 | unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode()); |
34079 | |
34080 | |
34081 | |
34082 | |
34083 | |
34084 | |
34085 | |
34086 | SmallVector<unsigned, 3> AvailableRegs; |
34087 | if (Subtarget.is64Bit()) |
34088 | AvailableRegs.push_back(X86::R11); |
34089 | else |
34090 | AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI}); |
34091 | |
34092 | |
34093 | for (const auto &MO : MI.operands()) { |
34094 | if (MO.isReg() && MO.isUse()) |
34095 | for (unsigned &Reg : AvailableRegs) |
34096 | if (Reg == MO.getReg()) |
34097 | Reg = 0; |
34098 | } |
34099 | |
34100 | |
34101 | unsigned AvailableReg = 0; |
34102 | for (unsigned MaybeReg : AvailableRegs) { |
34103 | if (MaybeReg) { |
34104 | AvailableReg = MaybeReg; |
34105 | break; |
34106 | } |
34107 | } |
34108 | if (!AvailableReg) |
34109 | report_fatal_error("calling convention incompatible with retpoline, no " |
34110 | "available registers"); |
34111 | |
34112 | const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg); |
34113 | |
34114 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) |
34115 | .addReg(CalleeVReg); |
34116 | MI.getOperand(0).ChangeToES(Symbol); |
34117 | MI.setDesc(TII->get(Opc)); |
34118 | MachineInstrBuilder(*BB->getParent(), &MI) |
34119 | .addReg(AvailableReg, RegState::Implicit | RegState::Kill); |
34120 | return BB; |
34121 | } |
34122 | |
34123 | |
34124 | |
34125 | |
34126 | |
34127 | |
34128 | |
34129 | |
34130 | |
34131 | |
34132 | |
34133 | |
34134 | |
34135 | void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, |
34136 | MachineBasicBlock *MBB) const { |
34137 | const DebugLoc &DL = MI.getDebugLoc(); |
34138 | MachineFunction *MF = MBB->getParent(); |
34139 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
34140 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
34141 | MachineInstrBuilder MIB; |
34142 | |
34143 | |
34144 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
34145 | MI.memoperands_end()); |
34146 | |
34147 | |
34148 | MVT PVT = getPointerTy(MF->getDataLayout()); |
34149 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
34150 | Register ZReg = MRI.createVirtualRegister(PtrRC); |
34151 | unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; |
34152 | BuildMI(*MBB, MI, DL, TII->get(XorRROpc)) |
34153 | .addDef(ZReg) |
34154 | .addReg(ZReg, RegState::Undef) |
34155 | .addReg(ZReg, RegState::Undef); |
34156 | |
34157 | |
34158 | Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); |
34159 | unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; |
34160 | BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); |
34161 | |
34162 | |
34163 | unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; |
34164 | MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc)); |
34165 | const int64_t SSPOffset = 3 * PVT.getStoreSize(); |
34166 | const unsigned MemOpndSlot = 1; |
34167 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
34168 | if (i == X86::AddrDisp) |
34169 | MIB.addDisp(MI.getOperand(MemOpndSlot + i), SSPOffset); |
34170 | else |
34171 | MIB.add(MI.getOperand(MemOpndSlot + i)); |
34172 | } |
34173 | MIB.addReg(SSPCopyReg); |
34174 | MIB.setMemRefs(MMOs); |
34175 | } |
34176 | |
34177 | MachineBasicBlock * |
34178 | X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, |
34179 | MachineBasicBlock *MBB) const { |
34180 | const DebugLoc &DL = MI.getDebugLoc(); |
34181 | MachineFunction *MF = MBB->getParent(); |
34182 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
34183 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
34184 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
34185 | |
34186 | const BasicBlock *BB = MBB->getBasicBlock(); |
34187 | MachineFunction::iterator I = ++MBB->getIterator(); |
34188 | |
34189 | |
34190 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
34191 | MI.memoperands_end()); |
34192 | |
34193 | unsigned DstReg; |
34194 | unsigned MemOpndSlot = 0; |
34195 | |
34196 | unsigned CurOp = 0; |
34197 | |
34198 | DstReg = MI.getOperand(CurOp++).getReg(); |
34199 | const TargetRegisterClass *RC = MRI.getRegClass(DstReg); |
34200 | assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); |
34201 | (void)TRI; |
34202 | Register mainDstReg = MRI.createVirtualRegister(RC); |
34203 | Register restoreDstReg = MRI.createVirtualRegister(RC); |
34204 | |
34205 | MemOpndSlot = CurOp; |
34206 | |
34207 | MVT PVT = getPointerTy(MF->getDataLayout()); |
34208 | assert((PVT == MVT::i64 || PVT == MVT::i32) && |
34209 | "Invalid Pointer Size!"); |
34210 | |
34211 | |
34212 | |
34213 | |
34214 | |
34215 | |
34216 | |
34217 | |
34218 | |
34219 | |
34220 | |
34221 | |
34222 | |
34223 | |
34224 | |
34225 | |
34226 | |
34227 | MachineBasicBlock *thisMBB = MBB; |
34228 | MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); |
34229 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
34230 | MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); |
34231 | MF->insert(I, mainMBB); |
34232 | MF->insert(I, sinkMBB); |
34233 | MF->push_back(restoreMBB); |
34234 | restoreMBB->setHasAddressTaken(); |
34235 | |
34236 | MachineInstrBuilder MIB; |
34237 | |
34238 | |
34239 | sinkMBB->splice(sinkMBB->begin(), MBB, |
34240 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
34241 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
34242 | |
34243 | |
34244 | unsigned PtrStoreOpc = 0; |
34245 | unsigned LabelReg = 0; |
34246 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); |
34247 | bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && |
34248 | !isPositionIndependent(); |
34249 | |
34250 | |
34251 | if (!UseImmLabel) { |
34252 | PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; |
34253 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
34254 | LabelReg = MRI.createVirtualRegister(PtrRC); |
34255 | if (Subtarget.is64Bit()) { |
34256 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg) |
34257 | .addReg(X86::RIP) |
34258 | .addImm(0) |
34259 | .addReg(0) |
34260 | .addMBB(restoreMBB) |
34261 | .addReg(0); |
34262 | } else { |
34263 | const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII); |
34264 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg) |
34265 | .addReg(XII->getGlobalBaseReg(MF)) |
34266 | .addImm(0) |
34267 | .addReg(0) |
34268 | .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference()) |
34269 | .addReg(0); |
34270 | } |
34271 | } else |
34272 | PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; |
34273 | |
34274 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc)); |
34275 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
34276 | if (i == X86::AddrDisp) |
34277 | MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset); |
34278 | else |
34279 | MIB.add(MI.getOperand(MemOpndSlot + i)); |
34280 | } |
34281 | if (!UseImmLabel) |
34282 | MIB.addReg(LabelReg); |
34283 | else |
34284 | MIB.addMBB(restoreMBB); |
34285 | MIB.setMemRefs(MMOs); |
34286 | |
34287 | if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { |
34288 | emitSetJmpShadowStackFix(MI, thisMBB); |
34289 | } |
34290 | |
34291 | |
34292 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) |
34293 | .addMBB(restoreMBB); |
34294 | |
34295 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
34296 | MIB.addRegMask(RegInfo->getNoPreservedMask()); |
34297 | thisMBB->addSuccessor(mainMBB); |
34298 | thisMBB->addSuccessor(restoreMBB); |
34299 | |
34300 | |
34301 | |
34302 | BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg); |
34303 | mainMBB->addSuccessor(sinkMBB); |
34304 | |
34305 | |
34306 | BuildMI(*sinkMBB, sinkMBB->begin(), DL, |
34307 | TII->get(X86::PHI), DstReg) |
34308 | .addReg(mainDstReg).addMBB(mainMBB) |
34309 | .addReg(restoreDstReg).addMBB(restoreMBB); |
34310 | |
34311 | |
34312 | if (RegInfo->hasBasePointer(*MF)) { |
34313 | const bool Uses64BitFramePtr = |
34314 | Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); |
34315 | X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); |
34316 | X86FI->setRestoreBasePointer(MF); |
34317 | Register FramePtr = RegInfo->getFrameRegister(*MF); |
34318 | Register BasePtr = RegInfo->getBaseRegister(); |
34319 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; |
34320 | addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), |
34321 | FramePtr, true, X86FI->getRestoreBasePointerOffset()) |
34322 | .setMIFlag(MachineInstr::FrameSetup); |
34323 | } |
34324 | BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1); |
34325 | BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); |
34326 | restoreMBB->addSuccessor(sinkMBB); |
34327 | |
34328 | MI.eraseFromParent(); |
34329 | return sinkMBB; |
34330 | } |
34331 | |
34332 | |
34333 | |
34334 | |
34335 | |
34336 | |
34337 | MachineBasicBlock * |
34338 | X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, |
34339 | MachineBasicBlock *MBB) const { |
34340 | const DebugLoc &DL = MI.getDebugLoc(); |
34341 | MachineFunction *MF = MBB->getParent(); |
34342 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
34343 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
34344 | |
34345 | |
34346 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
34347 | MI.memoperands_end()); |
34348 | |
34349 | MVT PVT = getPointerTy(MF->getDataLayout()); |
34350 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
34351 | |
34352 | |
34353 | |
34354 | |
34355 | |
34356 | |
34357 | |
34358 | |
34359 | |
34360 | |
34361 | |
34362 | |
34363 | |
34364 | |
34365 | |
34366 | |
34367 | |
34368 | |
34369 | |
34370 | |
34371 | |
34372 | |
34373 | |
34374 | |
34375 | |
34376 | MachineFunction::iterator I = ++MBB->getIterator(); |
34377 | const BasicBlock *BB = MBB->getBasicBlock(); |
34378 | |
34379 | MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB); |
34380 | MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); |
34381 | MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB); |
34382 | MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB); |
34383 | MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB); |
34384 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
34385 | MF->insert(I, checkSspMBB); |
34386 | MF->insert(I, fallMBB); |
34387 | MF->insert(I, fixShadowMBB); |
34388 | MF->insert(I, fixShadowLoopPrepareMBB); |
34389 | MF->insert(I, fixShadowLoopMBB); |
34390 | MF->insert(I, sinkMBB); |
34391 | |
34392 | |
34393 | sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI), |
34394 | MBB->end()); |
34395 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
34396 | |
34397 | MBB->addSuccessor(checkSspMBB); |
34398 | |
34399 | |
34400 | Register ZReg = MRI.createVirtualRegister(&X86::GR32RegClass); |
34401 | BuildMI(checkSspMBB, DL, TII->get(X86::MOV32r0), ZReg); |
34402 | |
34403 | if (PVT == MVT::i64) { |
34404 | Register TmpZReg = MRI.createVirtualRegister(PtrRC); |
34405 | BuildMI(checkSspMBB, DL, TII->get(X86::SUBREG_TO_REG), TmpZReg) |
34406 | .addImm(0) |
34407 | .addReg(ZReg) |
34408 | .addImm(X86::sub_32bit); |
34409 | ZReg = TmpZReg; |
34410 | } |
34411 | |
34412 | |
34413 | Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); |
34414 | unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; |
34415 | BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); |
34416 | |
34417 | |
34418 | |
34419 | unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr; |
34420 | BuildMI(checkSspMBB, DL, TII->get(TestRROpc)) |
34421 | .addReg(SSPCopyReg) |
34422 | .addReg(SSPCopyReg); |
34423 | BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E); |
34424 | checkSspMBB->addSuccessor(sinkMBB); |
34425 | checkSspMBB->addSuccessor(fallMBB); |
34426 | |
34427 | |
34428 | Register PrevSSPReg = MRI.createVirtualRegister(PtrRC); |
34429 | unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; |
34430 | const int64_t SPPOffset = 3 * PVT.getStoreSize(); |
34431 | MachineInstrBuilder MIB = |
34432 | BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg); |
34433 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
34434 | const MachineOperand &MO = MI.getOperand(i); |
34435 | if (i == X86::AddrDisp) |
34436 | MIB.addDisp(MO, SPPOffset); |
34437 | else if (MO.isReg()) |
34438 | |
34439 | MIB.addReg(MO.getReg()); |
34440 | else |
34441 | MIB.add(MO); |
34442 | } |
34443 | MIB.setMemRefs(MMOs); |
34444 | |
34445 | |
34446 | Register SspSubReg = MRI.createVirtualRegister(PtrRC); |
34447 | unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr; |
34448 | BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg) |
34449 | .addReg(PrevSSPReg) |
34450 | .addReg(SSPCopyReg); |
34451 | |
34452 | |
34453 | BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE); |
34454 | fallMBB->addSuccessor(sinkMBB); |
34455 | fallMBB->addSuccessor(fixShadowMBB); |
34456 | |
34457 | |
34458 | unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri; |
34459 | unsigned Offset = (PVT == MVT::i64) ? 3 : 2; |
34460 | Register SspFirstShrReg = MRI.createVirtualRegister(PtrRC); |
34461 | BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg) |
34462 | .addReg(SspSubReg) |
34463 | .addImm(Offset); |
34464 | |
34465 | |
34466 | unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD; |
34467 | BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg); |
34468 | |
34469 | |
34470 | Register SspSecondShrReg = MRI.createVirtualRegister(PtrRC); |
34471 | BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg) |
34472 | .addReg(SspFirstShrReg) |
34473 | .addImm(8); |
34474 | |
34475 | |
34476 | BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E); |
34477 | fixShadowMBB->addSuccessor(sinkMBB); |
34478 | fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB); |
34479 | |
34480 | |
34481 | unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1; |
34482 | Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC); |
34483 | BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg) |
34484 | .addReg(SspSecondShrReg); |
34485 | |
34486 | |
34487 | Register Value128InReg = MRI.createVirtualRegister(PtrRC); |
34488 | unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri; |
34489 | BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg) |
34490 | .addImm(128); |
34491 | fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB); |
34492 | |
34493 | |
34494 | |
34495 | Register DecReg = MRI.createVirtualRegister(PtrRC); |
34496 | Register CounterReg = MRI.createVirtualRegister(PtrRC); |
34497 | BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg) |
34498 | .addReg(SspAfterShlReg) |
34499 | .addMBB(fixShadowLoopPrepareMBB) |
34500 | .addReg(DecReg) |
34501 | .addMBB(fixShadowLoopMBB); |
34502 | |
34503 | |
34504 | BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg); |
34505 | |
34506 | |
34507 | unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r; |
34508 | BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg); |
34509 | |
34510 | |
34511 | BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE); |
34512 | fixShadowLoopMBB->addSuccessor(sinkMBB); |
34513 | fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB); |
34514 | |
34515 | return sinkMBB; |
34516 | } |
34517 | |
34518 | MachineBasicBlock * |
34519 | X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, |
34520 | MachineBasicBlock *MBB) const { |
34521 | const DebugLoc &DL = MI.getDebugLoc(); |
34522 | MachineFunction *MF = MBB->getParent(); |
34523 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
34524 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
34525 | |
34526 | |
34527 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
34528 | MI.memoperands_end()); |
34529 | |
34530 | MVT PVT = getPointerTy(MF->getDataLayout()); |
34531 | assert((PVT == MVT::i64 || PVT == MVT::i32) && |
34532 | "Invalid Pointer Size!"); |
34533 | |
34534 | const TargetRegisterClass *RC = |
34535 | (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; |
34536 | Register Tmp = MRI.createVirtualRegister(RC); |
34537 | |
34538 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
34539 | Register FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; |
34540 | Register SP = RegInfo->getStackRegister(); |
34541 | |
34542 | MachineInstrBuilder MIB; |
34543 | |
34544 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); |
34545 | const int64_t SPOffset = 2 * PVT.getStoreSize(); |
34546 | |
34547 | unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; |
34548 | unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r; |
34549 | |
34550 | MachineBasicBlock *thisMBB = MBB; |
34551 | |
34552 | |
34553 | if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { |
34554 | thisMBB = emitLongJmpShadowStackFix(MI, thisMBB); |
34555 | } |
34556 | |
34557 | |
34558 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP); |
34559 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
34560 | const MachineOperand &MO = MI.getOperand(i); |
34561 | if (MO.isReg()) |
34562 | |
34563 | MIB.addReg(MO.getReg()); |
34564 | else |
34565 | MIB.add(MO); |
34566 | } |
34567 | MIB.setMemRefs(MMOs); |
34568 | |
34569 | |
34570 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp); |
34571 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
34572 | const MachineOperand &MO = MI.getOperand(i); |
34573 | if (i == X86::AddrDisp) |
34574 | MIB.addDisp(MO, LabelOffset); |
34575 | else if (MO.isReg()) |
34576 | |
34577 | MIB.addReg(MO.getReg()); |
34578 | else |
34579 | MIB.add(MO); |
34580 | } |
34581 | MIB.setMemRefs(MMOs); |
34582 | |
34583 | |
34584 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP); |
34585 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
34586 | if (i == X86::AddrDisp) |
34587 | MIB.addDisp(MI.getOperand(i), SPOffset); |
34588 | else |
34589 | MIB.add(MI.getOperand(i)); |
34590 | |
34591 | } |
34592 | MIB.setMemRefs(MMOs); |
34593 | |
34594 | |
34595 | BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp); |
34596 | |
34597 | MI.eraseFromParent(); |
34598 | return thisMBB; |
34599 | } |
34600 | |
34601 | void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, |
34602 | MachineBasicBlock *MBB, |
34603 | MachineBasicBlock *DispatchBB, |
34604 | int FI) const { |
34605 | const DebugLoc &DL = MI.getDebugLoc(); |
34606 | MachineFunction *MF = MBB->getParent(); |
34607 | MachineRegisterInfo *MRI = &MF->getRegInfo(); |
34608 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
34609 | |
34610 | MVT PVT = getPointerTy(MF->getDataLayout()); |
34611 | assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); |
34612 | |
34613 | unsigned Op = 0; |
34614 | unsigned VR = 0; |
34615 | |
34616 | bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && |
34617 | !isPositionIndependent(); |
34618 | |
34619 | if (UseImmLabel) { |
34620 | Op = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; |
34621 | } else { |
34622 | const TargetRegisterClass *TRC = |
34623 | (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; |
34624 | VR = MRI->createVirtualRegister(TRC); |
34625 | Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; |
34626 | |
34627 | if (Subtarget.is64Bit()) |
34628 | BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR) |
34629 | .addReg(X86::RIP) |
34630 | .addImm(1) |
34631 | .addReg(0) |
34632 | .addMBB(DispatchBB) |
34633 | .addReg(0); |
34634 | else |
34635 | BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR) |
34636 | .addReg(0) |
34637 | .addImm(1) |
34638 | .addReg(0) |
34639 | .addMBB(DispatchBB, Subtarget.classifyBlockAddressReference()) |
34640 | .addReg(0); |
34641 | } |
34642 | |
34643 | MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op)); |
34644 | addFrameReference(MIB, FI, Subtarget.is64Bit() ? 56 : 36); |
34645 | if (UseImmLabel) |
34646 | MIB.addMBB(DispatchBB); |
34647 | else |
34648 | MIB.addReg(VR); |
34649 | } |
34650 | |
34651 | MachineBasicBlock * |
34652 | X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, |
34653 | MachineBasicBlock *BB) const { |
34654 | const DebugLoc &DL = MI.getDebugLoc(); |
34655 | MachineFunction *MF = BB->getParent(); |
34656 | MachineRegisterInfo *MRI = &MF->getRegInfo(); |
34657 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
34658 | int FI = MF->getFrameInfo().getFunctionContextIndex(); |
34659 | |
34660 | |
34661 | |
34662 | DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad; |
34663 | unsigned MaxCSNum = 0; |
34664 | for (auto &MBB : *MF) { |
34665 | if (!MBB.isEHPad()) |
34666 | continue; |
34667 | |
34668 | MCSymbol *Sym = nullptr; |
34669 | for (const auto &MI : MBB) { |
34670 | if (MI.isDebugInstr()) |
34671 | continue; |
34672 | |
34673 | assert(MI.isEHLabel() && "expected EH_LABEL"); |
34674 | Sym = MI.getOperand(0).getMCSymbol(); |
34675 | break; |
34676 | } |
34677 | |
34678 | if (!MF->hasCallSiteLandingPad(Sym)) |
34679 | continue; |
34680 | |
34681 | for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) { |
34682 | CallSiteNumToLPad[CSI].push_back(&MBB); |
34683 | MaxCSNum = std::max(MaxCSNum, CSI); |
34684 | } |
34685 | } |
34686 | |
34687 | |
34688 | std::vector<MachineBasicBlock *> LPadList; |
34689 | SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs; |
34690 | LPadList.reserve(CallSiteNumToLPad.size()); |
34691 | |
34692 | for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) { |
34693 | for (auto &LP : CallSiteNumToLPad[CSI]) { |
34694 | LPadList.push_back(LP); |
34695 | InvokeBBs.insert(LP->pred_begin(), LP->pred_end()); |
34696 | } |
34697 | } |
34698 | |
34699 | assert(!LPadList.empty() && |
34700 | "No landing pad destinations for the dispatch jump table!"); |
34701 | |
34702 | |
34703 | |
34704 | |
34705 | MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); |
34706 | DispatchBB->setIsEHPad(true); |
34707 | |
34708 | MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); |
34709 | BuildMI(TrapBB, DL, TII->get(X86::TRAP)); |
34710 | DispatchBB->addSuccessor(TrapBB); |
34711 | |
34712 | MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); |
34713 | DispatchBB->addSuccessor(DispContBB); |
34714 | |
34715 | |
34716 | MF->push_back(DispatchBB); |
34717 | MF->push_back(DispContBB); |
34718 | MF->push_back(TrapBB); |
34719 | |
34720 | |
34721 | |
34722 | SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI); |
34723 | |
34724 | |
34725 | unsigned JTE = getJumpTableEncoding(); |
34726 | MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE); |
34727 | unsigned MJTI = JTI->createJumpTableIndex(LPadList); |
34728 | |
34729 | const X86RegisterInfo &RI = TII->getRegisterInfo(); |
34730 | |
34731 | |
34732 | if (RI.hasBasePointer(*MF)) { |
34733 | const bool FPIs64Bit = |
34734 | Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); |
34735 | X86MachineFunctionInfo *MFI = MF->getInfo<X86MachineFunctionInfo>(); |
34736 | MFI->setRestoreBasePointer(MF); |
34737 | |
34738 | Register FP = RI.getFrameRegister(*MF); |
34739 | Register BP = RI.getBaseRegister(); |
34740 | unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm; |
34741 | addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true, |
34742 | MFI->getRestoreBasePointerOffset()) |
34743 | .addRegMask(RI.getNoPreservedMask()); |
34744 | } else { |
34745 | BuildMI(DispatchBB, DL, TII->get(X86::NOOP)) |
34746 | .addRegMask(RI.getNoPreservedMask()); |
34747 | } |
34748 | |
34749 | |
34750 | Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); |
34751 | addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI, |
34752 | Subtarget.is64Bit() ? 8 : 4); |
34753 | BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri)) |
34754 | .addReg(IReg) |
34755 | .addImm(LPadList.size()); |
34756 | BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE); |
34757 | |
34758 | if (Subtarget.is64Bit()) { |
34759 | Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass); |
34760 | Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); |
34761 | |
34762 | |
34763 | BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg) |
34764 | .addReg(X86::RIP) |
34765 | .addImm(1) |
34766 | .addReg(0) |
34767 | .addJumpTableIndex(MJTI) |
34768 | .addReg(0); |
34769 | |
34770 | BuildMI(DispContBB, DL, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64) |
34771 | .addImm(0) |
34772 | .addReg(IReg) |
34773 | .addImm(X86::sub_32bit); |
34774 | |
34775 | switch (JTE) { |
34776 | case MachineJumpTableInfo::EK_BlockAddress: |
34777 | |
34778 | BuildMI(DispContBB, DL, TII->get(X86::JMP64m)) |
34779 | .addReg(BReg) |
34780 | .addImm(8) |
34781 | .addReg(IReg64) |
34782 | .addImm(0) |
34783 | .addReg(0); |
34784 | break; |
34785 | case MachineJumpTableInfo::EK_LabelDifference32: { |
34786 | Register OReg = MRI->createVirtualRegister(&X86::GR32RegClass); |
34787 | Register OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); |
34788 | Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass); |
34789 | |
34790 | |
34791 | BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg) |
34792 | .addReg(BReg) |
34793 | .addImm(4) |
34794 | .addReg(IReg64) |
34795 | .addImm(0) |
34796 | .addReg(0); |
34797 | |
34798 | BuildMI(DispContBB, DL, TII->get(X86::MOVSX64rr32), OReg64).addReg(OReg); |
34799 | |
34800 | BuildMI(DispContBB, DL, TII->get(X86::ADD64rr), TReg) |
34801 | .addReg(OReg64) |
34802 | .addReg(BReg); |
34803 | |
34804 | BuildMI(DispContBB, DL, TII->get(X86::JMP64r)).addReg(TReg); |
34805 | break; |
34806 | } |
34807 | default: |
34808 | llvm_unreachable("Unexpected jump table encoding"); |
34809 | } |
34810 | } else { |
34811 | |
34812 | BuildMI(DispContBB, DL, TII->get(X86::JMP32m)) |
34813 | .addReg(0) |
34814 | .addImm(4) |
34815 | .addReg(IReg) |
34816 | .addJumpTableIndex(MJTI) |
34817 | .addReg(0); |
34818 | } |
34819 | |
34820 | |
34821 | SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs; |
34822 | for (auto &LP : LPadList) |
34823 | if (SeenMBBs.insert(LP).second) |
34824 | DispContBB->addSuccessor(LP); |
34825 | |
34826 | |
34827 | SmallVector<MachineBasicBlock *, 64> MBBLPads; |
34828 | const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); |
34829 | for (MachineBasicBlock *MBB : InvokeBBs) { |
34830 | |
34831 | |
34832 | |
34833 | SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(), |
34834 | MBB->succ_rend()); |
34835 | |
34836 | for (auto MBBS : Successors) { |
34837 | if (MBBS->isEHPad()) { |
34838 | MBB->removeSuccessor(MBBS); |
34839 | MBBLPads.push_back(MBBS); |
34840 | } |
34841 | } |
34842 | |
34843 | MBB->addSuccessor(DispatchBB); |
34844 | |
34845 | |
34846 | |
34847 | |
34848 | |
34849 | for (auto &II : reverse(*MBB)) { |
34850 | if (!II.isCall()) |
34851 | continue; |
34852 | |
34853 | DenseMap<unsigned, bool> DefRegs; |
34854 | for (auto &MOp : II.operands()) |
34855 | if (MOp.isReg()) |
34856 | DefRegs[MOp.getReg()] = true; |
34857 | |
34858 | MachineInstrBuilder MIB(*MF, &II); |
34859 | for (unsigned RegIdx = 0; SavedRegs[RegIdx]; ++RegIdx) { |
34860 | unsigned Reg = SavedRegs[RegIdx]; |
34861 | if (!DefRegs[Reg]) |
34862 | MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); |
34863 | } |
34864 | |
34865 | break; |
34866 | } |
34867 | } |
34868 | |
34869 | |
34870 | |
34871 | for (auto &LP : MBBLPads) |
34872 | LP->setIsEHPad(false); |
34873 | |
34874 | |
34875 | MI.eraseFromParent(); |
34876 | return BB; |
34877 | } |
34878 | |
34879 | MachineBasicBlock * |
34880 | X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
34881 | MachineBasicBlock *BB) const { |
34882 | MachineFunction *MF = BB->getParent(); |
34883 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
34884 | const DebugLoc &DL = MI.getDebugLoc(); |
34885 | |
34886 | auto TMMImmToTMMReg = [](unsigned Imm) { |
34887 | assert (Imm < 8 && "Illegal tmm index"); |
34888 | return X86::TMM0 + Imm; |
34889 | }; |
34890 | switch (MI.getOpcode()) { |
34891 | default: llvm_unreachable("Unexpected instr type to insert"); |
34892 | case X86::TLS_addr32: |
34893 | case X86::TLS_addr64: |
34894 | case X86::TLS_addrX32: |
34895 | case X86::TLS_base_addr32: |
34896 | case X86::TLS_base_addr64: |
34897 | case X86::TLS_base_addrX32: |
34898 | return EmitLoweredTLSAddr(MI, BB); |
34899 | case X86::INDIRECT_THUNK_CALL32: |
34900 | case X86::INDIRECT_THUNK_CALL64: |
34901 | case X86::INDIRECT_THUNK_TCRETURN32: |
34902 | case X86::INDIRECT_THUNK_TCRETURN64: |
34903 | return EmitLoweredIndirectThunk(MI, BB); |
34904 | case X86::CATCHRET: |
34905 | return EmitLoweredCatchRet(MI, BB); |
34906 | case X86::SEG_ALLOCA_32: |
34907 | case X86::SEG_ALLOCA_64: |
34908 | return EmitLoweredSegAlloca(MI, BB); |
34909 | case X86::PROBED_ALLOCA_32: |
34910 | case X86::PROBED_ALLOCA_64: |
34911 | return EmitLoweredProbedAlloca(MI, BB); |
34912 | case X86::TLSCall_32: |
34913 | case X86::TLSCall_64: |
34914 | return EmitLoweredTLSCall(MI, BB); |
34915 | case X86::CMOV_FR32: |
34916 | case X86::CMOV_FR32X: |
34917 | case X86::CMOV_FR64: |
34918 | case X86::CMOV_FR64X: |
34919 | case X86::CMOV_GR8: |
34920 | case X86::CMOV_GR16: |
34921 | case X86::CMOV_GR32: |
34922 | case X86::CMOV_RFP32: |
34923 | case X86::CMOV_RFP64: |
34924 | case X86::CMOV_RFP80: |
34925 | case X86::CMOV_VR64: |
34926 | case X86::CMOV_VR128: |
34927 | case X86::CMOV_VR128X: |
34928 | case X86::CMOV_VR256: |
34929 | case X86::CMOV_VR256X: |
34930 | case X86::CMOV_VR512: |
34931 | case X86::CMOV_VK1: |
34932 | case X86::CMOV_VK2: |
34933 | case X86::CMOV_VK4: |
34934 | case X86::CMOV_VK8: |
34935 | case X86::CMOV_VK16: |
34936 | case X86::CMOV_VK32: |
34937 | case X86::CMOV_VK64: |
34938 | return EmitLoweredSelect(MI, BB); |
34939 | |
34940 | case X86::RDFLAGS32: |
34941 | case X86::RDFLAGS64: { |
34942 | unsigned PushF = |
34943 | MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; |
34944 | unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; |
34945 | MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF)); |
34946 | |
34947 | |
34948 | |
34949 | |
34950 | assert(Push->getOperand(2).getReg() == X86::EFLAGS && |
34951 | "Unexpected register in operand!"); |
34952 | Push->getOperand(2).setIsUndef(); |
34953 | assert(Push->getOperand(3).getReg() == X86::DF && |
34954 | "Unexpected register in operand!"); |
34955 | Push->getOperand(3).setIsUndef(); |
34956 | BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg()); |
34957 | |
34958 | MI.eraseFromParent(); |
34959 | return BB; |
34960 | } |
34961 | |
34962 | case X86::WRFLAGS32: |
34963 | case X86::WRFLAGS64: { |
34964 | unsigned Push = |
34965 | MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r; |
34966 | unsigned PopF = |
34967 | MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64; |
34968 | BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg()); |
34969 | BuildMI(*BB, MI, DL, TII->get(PopF)); |
34970 | |
34971 | MI.eraseFromParent(); |
34972 | return BB; |
34973 | } |
34974 | |
34975 | case X86::FP32_TO_INT16_IN_MEM: |
34976 | case X86::FP32_TO_INT32_IN_MEM: |
34977 | case X86::FP32_TO_INT64_IN_MEM: |
34978 | case X86::FP64_TO_INT16_IN_MEM: |
34979 | case X86::FP64_TO_INT32_IN_MEM: |
34980 | case X86::FP64_TO_INT64_IN_MEM: |
34981 | case X86::FP80_TO_INT16_IN_MEM: |
34982 | case X86::FP80_TO_INT32_IN_MEM: |
34983 | case X86::FP80_TO_INT64_IN_MEM: { |
34984 | |
34985 | |
34986 | int OrigCWFrameIdx = |
34987 | MF->getFrameInfo().CreateStackObject(2, Align(2), false); |
34988 | addFrameReference(BuildMI(*BB, MI, DL, |
34989 | TII->get(X86::FNSTCW16m)), OrigCWFrameIdx); |
34990 | |
34991 | |
34992 | Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); |
34993 | addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW), |
34994 | OrigCWFrameIdx); |
34995 | |
34996 | |
34997 | Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); |
34998 | BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW) |
34999 | .addReg(OldCW, RegState::Kill).addImm(0xC00); |
35000 | |
35001 | |
35002 | Register NewCW16 = |
35003 | MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); |
35004 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16) |
35005 | .addReg(NewCW, RegState::Kill, X86::sub_16bit); |
35006 | |
35007 | |
35008 | int NewCWFrameIdx = |
35009 | MF->getFrameInfo().CreateStackObject(2, Align(2), false); |
35010 | addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), |
35011 | NewCWFrameIdx) |
35012 | .addReg(NewCW16, RegState::Kill); |
35013 | |
35014 | |
35015 | addFrameReference(BuildMI(*BB, MI, DL, |
35016 | TII->get(X86::FLDCW16m)), NewCWFrameIdx); |
35017 | |
35018 | |
35019 | unsigned Opc; |
35020 | switch (MI.getOpcode()) { |
35021 | default: llvm_unreachable("illegal opcode!"); |
35022 | case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; |
35023 | case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; |
35024 | case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; |
35025 | case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; |
35026 | case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; |
35027 | case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; |
35028 | case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; |
35029 | case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; |
35030 | case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; |
35031 | } |
35032 | |
35033 | X86AddressMode AM = getAddressFromInstr(&MI, 0); |
35034 | addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM) |
35035 | .addReg(MI.getOperand(X86::AddrNumOperands).getReg()); |
35036 | |
35037 | |
35038 | addFrameReference(BuildMI(*BB, MI, DL, |
35039 | TII->get(X86::FLDCW16m)), OrigCWFrameIdx); |
35040 | |
35041 | MI.eraseFromParent(); |
35042 | return BB; |
35043 | } |
35044 | |
35045 | |
35046 | case X86::XBEGIN: |
35047 | return emitXBegin(MI, BB, Subtarget.getInstrInfo()); |
35048 | |
35049 | case X86::VAARG_64: |
35050 | case X86::VAARG_X32: |
35051 | return EmitVAARGWithCustomInserter(MI, BB); |
35052 | |
35053 | case X86::EH_SjLj_SetJmp32: |
35054 | case X86::EH_SjLj_SetJmp64: |
35055 | return emitEHSjLjSetJmp(MI, BB); |
35056 | |
35057 | case X86::EH_SjLj_LongJmp32: |
35058 | case X86::EH_SjLj_LongJmp64: |
35059 | return emitEHSjLjLongJmp(MI, BB); |
35060 | |
35061 | case X86::Int_eh_sjlj_setup_dispatch: |
35062 | return EmitSjLjDispatchBlock(MI, BB); |
35063 | |
35064 | case TargetOpcode::STATEPOINT: |
35065 | |
35066 | |
35067 | return emitPatchPoint(MI, BB); |
35068 | |
35069 | case TargetOpcode::STACKMAP: |
35070 | case TargetOpcode::PATCHPOINT: |
35071 | return emitPatchPoint(MI, BB); |
35072 | |
35073 | case TargetOpcode::PATCHABLE_EVENT_CALL: |
35074 | case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: |
35075 | return BB; |
35076 | |
35077 | case X86::LCMPXCHG8B: { |
35078 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
35079 | |
35080 | |
35081 | |
35082 | |
35083 | |
35084 | |
35085 | |
35086 | |
35087 | |
35088 | |
35089 | |
35090 | if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF)) |
35091 | return BB; |
35092 | |
35093 | |
35094 | |
35095 | |
35096 | |
35097 | assert(TRI->getBaseRegister() == X86::ESI && |
35098 | "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a " |
35099 | "base pointer in mind"); |
35100 | |
35101 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
35102 | MVT SPTy = getPointerTy(MF->getDataLayout()); |
35103 | const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); |
35104 | Register computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); |
35105 | |
35106 | X86AddressMode AM = getAddressFromInstr(&MI, 0); |
35107 | |
35108 | |
35109 | if (AM.IndexReg == X86::NoRegister) |
35110 | return BB; |
35111 | |
35112 | |
35113 | |
35114 | |
35115 | MachineBasicBlock::reverse_iterator RMBBI(MI.getReverseIterator()); |
35116 | while (RMBBI != BB->rend() && (RMBBI->definesRegister(X86::EAX) || |
35117 | RMBBI->definesRegister(X86::EBX) || |
35118 | RMBBI->definesRegister(X86::ECX) || |
35119 | RMBBI->definesRegister(X86::EDX))) { |
35120 | ++RMBBI; |
35121 | } |
35122 | MachineBasicBlock::iterator MBBI(RMBBI); |
35123 | addFullAddress( |
35124 | BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM); |
35125 | |
35126 | setDirectAddressInInstr(&MI, 0, computedAddrVReg); |
35127 | |
35128 | return BB; |
35129 | } |
35130 | case X86::LCMPXCHG16B_NO_RBX: { |
35131 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
35132 | Register BasePtr = TRI->getBaseRegister(); |
35133 | if (TRI->hasBasePointer(*MF) && |
35134 | (BasePtr == X86::RBX || BasePtr == X86::EBX)) { |
35135 | if (!BB->isLiveIn(BasePtr)) |
35136 | BB->addLiveIn(BasePtr); |
35137 | |
35138 | Register SaveRBX = |
35139 | MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
35140 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX) |
35141 | .addReg(X86::RBX); |
35142 | Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
35143 | MachineInstrBuilder MIB = |
35144 | BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst); |
35145 | for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx) |
35146 | MIB.add(MI.getOperand(Idx)); |
35147 | MIB.add(MI.getOperand(X86::AddrNumOperands)); |
35148 | MIB.addReg(SaveRBX); |
35149 | } else { |
35150 | |
35151 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX) |
35152 | .add(MI.getOperand(X86::AddrNumOperands)); |
35153 | MachineInstrBuilder MIB = |
35154 | BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B)); |
35155 | for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx) |
35156 | MIB.add(MI.getOperand(Idx)); |
35157 | } |
35158 | MI.eraseFromParent(); |
35159 | return BB; |
35160 | } |
35161 | case X86::MWAITX: { |
35162 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
35163 | Register BasePtr = TRI->getBaseRegister(); |
35164 | bool IsRBX = (BasePtr == X86::RBX || BasePtr == X86::EBX); |
35165 | |
35166 | |
35167 | if (!IsRBX || !TRI->hasBasePointer(*MF)) { |
35168 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX) |
35169 | .addReg(MI.getOperand(0).getReg()); |
35170 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX) |
35171 | .addReg(MI.getOperand(1).getReg()); |
35172 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EBX) |
35173 | .addReg(MI.getOperand(2).getReg()); |
35174 | BuildMI(*BB, MI, DL, TII->get(X86::MWAITXrrr)); |
35175 | MI.eraseFromParent(); |
35176 | } else { |
35177 | if (!BB->isLiveIn(BasePtr)) { |
35178 | BB->addLiveIn(BasePtr); |
35179 | } |
35180 | |
35181 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX) |
35182 | .addReg(MI.getOperand(0).getReg()); |
35183 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX) |
35184 | .addReg(MI.getOperand(1).getReg()); |
35185 | assert(Subtarget.is64Bit() && "Expected 64-bit mode!"); |
35186 | |
35187 | Register SaveRBX = |
35188 | MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
35189 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX) |
35190 | .addReg(X86::RBX); |
35191 | |
35192 | Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
35193 | BuildMI(*BB, MI, DL, TII->get(X86::MWAITX_SAVE_RBX)) |
35194 | .addDef(Dst) |
35195 | .addReg(MI.getOperand(2).getReg()) |
35196 | .addUse(SaveRBX); |
35197 | MI.eraseFromParent(); |
35198 | } |
35199 | return BB; |
35200 | } |
35201 | case TargetOpcode::PREALLOCATED_SETUP: { |
35202 | assert(Subtarget.is32Bit() && "preallocated only used in 32-bit"); |
35203 | auto MFI = MF->getInfo<X86MachineFunctionInfo>(); |
35204 | MFI->setHasPreallocatedCall(true); |
35205 | int64_t PreallocatedId = MI.getOperand(0).getImm(); |
35206 | size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId); |
35207 | assert(StackAdjustment != 0 && "0 stack adjustment"); |
35208 | LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment " |
35209 | << StackAdjustment << "\n"); |
35210 | BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP) |
35211 | .addReg(X86::ESP) |
35212 | .addImm(StackAdjustment); |
35213 | MI.eraseFromParent(); |
35214 | return BB; |
35215 | } |
35216 | case TargetOpcode::PREALLOCATED_ARG: { |
35217 | assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit"); |
35218 | int64_t PreallocatedId = MI.getOperand(1).getImm(); |
35219 | int64_t ArgIdx = MI.getOperand(2).getImm(); |
35220 | auto MFI = MF->getInfo<X86MachineFunctionInfo>(); |
35221 | size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx]; |
35222 | LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx |
35223 | << ", arg offset " << ArgOffset << "\n"); |
35224 | |
35225 | addRegOffset( |
35226 | BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()), |
35227 | X86::ESP, false, ArgOffset); |
35228 | MI.eraseFromParent(); |
35229 | return BB; |
35230 | } |
35231 | case X86::PTDPBSSD: |
35232 | case X86::PTDPBSUD: |
35233 | case X86::PTDPBUSD: |
35234 | case X86::PTDPBUUD: |
35235 | case X86::PTDPBF16PS: { |
35236 | unsigned Opc; |
35237 | switch (MI.getOpcode()) { |
35238 | default: llvm_unreachable("illegal opcode!"); |
35239 | case X86::PTDPBSSD: Opc = X86::TDPBSSD; break; |
35240 | case X86::PTDPBSUD: Opc = X86::TDPBSUD; break; |
35241 | case X86::PTDPBUSD: Opc = X86::TDPBUSD; break; |
35242 | case X86::PTDPBUUD: Opc = X86::TDPBUUD; break; |
35243 | case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break; |
35244 | } |
35245 | |
35246 | MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); |
35247 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define); |
35248 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Undef); |
35249 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef); |
35250 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(2).getImm()), RegState::Undef); |
35251 | |
35252 | MI.eraseFromParent(); |
35253 | return BB; |
35254 | } |
35255 | case X86::PTILEZERO: { |
35256 | unsigned Imm = MI.getOperand(0).getImm(); |
35257 | BuildMI(*BB, MI, DL, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm)); |
35258 | MI.eraseFromParent(); |
35259 | return BB; |
35260 | } |
35261 | case X86::PTILELOADD: |
35262 | case X86::PTILELOADDT1: |
35263 | case X86::PTILESTORED: { |
35264 | unsigned Opc; |
35265 | switch (MI.getOpcode()) { |
35266 | default: llvm_unreachable("illegal opcode!"); |
35267 | case X86::PTILELOADD: Opc = X86::TILELOADD; break; |
35268 | case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break; |
35269 | case X86::PTILESTORED: Opc = X86::TILESTORED; break; |
35270 | } |
35271 | |
35272 | MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); |
35273 | unsigned CurOp = 0; |
35274 | if (Opc != X86::TILESTORED) |
35275 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()), |
35276 | RegState::Define); |
35277 | |
35278 | MIB.add(MI.getOperand(CurOp++)); |
35279 | MIB.add(MI.getOperand(CurOp++)); |
35280 | MIB.add(MI.getOperand(CurOp++)); |
35281 | MIB.add(MI.getOperand(CurOp++)); |
35282 | MIB.add(MI.getOperand(CurOp++)); |
35283 | |
35284 | if (Opc == X86::TILESTORED) |
35285 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()), |
35286 | RegState::Undef); |
35287 | |
35288 | MI.eraseFromParent(); |
35289 | return BB; |
35290 | } |
35291 | } |
35292 | } |
35293 | |
35294 | |
35295 | |
35296 | |
35297 | |
35298 | bool |
35299 | X86TargetLowering::targetShrinkDemandedConstant(SDValue Op, |
35300 | const APInt &DemandedBits, |
35301 | const APInt &DemandedElts, |
35302 | TargetLoweringOpt &TLO) const { |
35303 | EVT VT = Op.getValueType(); |
35304 | unsigned Opcode = Op.getOpcode(); |
35305 | unsigned EltSize = VT.getScalarSizeInBits(); |
35306 | |
35307 | if (VT.isVector()) { |
35308 | |
35309 | |
35310 | |
35311 | auto NeedsSignExtension = [&](SDValue V, unsigned ActiveBits) { |
35312 | if (!ISD::isBuildVectorOfConstantSDNodes(V.getNode())) |
35313 | return false; |
35314 | for (unsigned i = 0, e = V.getNumOperands(); i != e; ++i) { |
35315 | if (!DemandedElts[i] || V.getOperand(i).isUndef()) |
35316 | continue; |
35317 | const APInt &Val = V.getConstantOperandAPInt(i); |
35318 | if (Val.getBitWidth() > Val.getNumSignBits() && |
35319 | Val.trunc(ActiveBits).getNumSignBits() == ActiveBits) |
35320 | return true; |
35321 | } |
35322 | return false; |
35323 | }; |
35324 | |
35325 | |
35326 | unsigned ActiveBits = DemandedBits.getActiveBits(); |
35327 | if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) && |
35328 | (Opcode == ISD::OR || Opcode == ISD::XOR) && |
35329 | NeedsSignExtension(Op.getOperand(1), ActiveBits)) { |
35330 | EVT ExtSVT = EVT::getIntegerVT(*TLO.DAG.getContext(), ActiveBits); |
35331 | EVT ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtSVT, |
35332 | VT.getVectorNumElements()); |
35333 | SDValue NewC = |
35334 | TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT, |
35335 | Op.getOperand(1), TLO.DAG.getValueType(ExtVT)); |
35336 | SDValue NewOp = |
35337 | TLO.DAG.getNode(Opcode, SDLoc(Op), VT, Op.getOperand(0), NewC); |
35338 | return TLO.CombineTo(Op, NewOp); |
35339 | } |
35340 | return false; |
35341 | } |
35342 | |
35343 | |
35344 | |
35345 | if (Opcode != ISD::AND) |
35346 | return false; |
35347 | |
35348 | |
35349 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
35350 | if (!C) |
35351 | return false; |
35352 | |
35353 | const APInt &Mask = C->getAPIntValue(); |
35354 | |
35355 | |
35356 | APInt ShrunkMask = Mask & DemandedBits; |
35357 | |
35358 | |
35359 | unsigned Width = ShrunkMask.getActiveBits(); |
35360 | |
35361 | |
35362 | if (Width == 0) |
35363 | return false; |
35364 | |
35365 | |
35366 | Width = PowerOf2Ceil(std::max(Width, 8U)); |
35367 | |
35368 | Width = std::min(Width, EltSize); |
35369 | |
35370 | |
35371 | APInt ZeroExtendMask = APInt::getLowBitsSet(EltSize, Width); |
35372 | |
35373 | |
35374 | |
35375 | if (ZeroExtendMask == Mask) |
35376 | return true; |
35377 | |
35378 | |
35379 | |
35380 | if (!ZeroExtendMask.isSubsetOf(Mask | ~DemandedBits)) |
35381 | return false; |
35382 | |
35383 | |
35384 | SDLoc DL(Op); |
35385 | SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT); |
35386 | SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); |
35387 | return TLO.CombineTo(Op, NewOp); |
35388 | } |
35389 | |
35390 | void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
35391 | KnownBits &Known, |
35392 | const APInt &DemandedElts, |
35393 | const SelectionDAG &DAG, |
35394 | unsigned Depth) const { |
35395 | unsigned BitWidth = Known.getBitWidth(); |
35396 | unsigned NumElts = DemandedElts.getBitWidth(); |
35397 | unsigned Opc = Op.getOpcode(); |
35398 | EVT VT = Op.getValueType(); |
35399 | assert((Opc >= ISD::BUILTIN_OP_END || |
35400 | Opc == ISD::INTRINSIC_WO_CHAIN || |
35401 | Opc == ISD::INTRINSIC_W_CHAIN || |
35402 | Opc == ISD::INTRINSIC_VOID) && |
35403 | "Should use MaskedValueIsZero if you don't know whether Op" |
35404 | " is a target node!"); |
35405 | |
35406 | Known.resetAll(); |
35407 | switch (Opc) { |
35408 | default: break; |
35409 | case X86ISD::SETCC: |
35410 | Known.Zero.setBitsFrom(1); |
35411 | break; |
35412 | case X86ISD::MOVMSK: { |
35413 | unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements(); |
35414 | Known.Zero.setBitsFrom(NumLoBits); |
35415 | break; |
35416 | } |
35417 | case X86ISD::PEXTRB: |
35418 | case X86ISD::PEXTRW: { |
35419 | SDValue Src = Op.getOperand(0); |
35420 | EVT SrcVT = Src.getValueType(); |
35421 | APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(), |
35422 | Op.getConstantOperandVal(1)); |
35423 | Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1); |
35424 | Known = Known.anyextOrTrunc(BitWidth); |
35425 | Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits()); |
35426 | break; |
35427 | } |
35428 | case X86ISD::VSRAI: |
35429 | case X86ISD::VSHLI: |
35430 | case X86ISD::VSRLI: { |
35431 | unsigned ShAmt = Op.getConstantOperandVal(1); |
35432 | if (ShAmt >= VT.getScalarSizeInBits()) { |
35433 | Known.setAllZero(); |
35434 | break; |
35435 | } |
35436 | |
35437 | Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
35438 | if (Opc == X86ISD::VSHLI) { |
35439 | Known.Zero <<= ShAmt; |
35440 | Known.One <<= ShAmt; |
35441 | |
35442 | Known.Zero.setLowBits(ShAmt); |
35443 | } else if (Opc == X86ISD::VSRLI) { |
35444 | Known.Zero.lshrInPlace(ShAmt); |
35445 | Known.One.lshrInPlace(ShAmt); |
35446 | |
35447 | Known.Zero.setHighBits(ShAmt); |
35448 | } else { |
35449 | Known.Zero.ashrInPlace(ShAmt); |
35450 | Known.One.ashrInPlace(ShAmt); |
35451 | } |
35452 | break; |
35453 | } |
35454 | case X86ISD::PACKUS: { |
35455 | |
35456 | APInt DemandedLHS, DemandedRHS; |
35457 | getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); |
35458 | |
35459 | Known.One = APInt::getAllOnesValue(BitWidth * 2); |
35460 | Known.Zero = APInt::getAllOnesValue(BitWidth * 2); |
35461 | |
35462 | KnownBits Known2; |
35463 | if (!!DemandedLHS) { |
35464 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1); |
35465 | Known = KnownBits::commonBits(Known, Known2); |
35466 | } |
35467 | if (!!DemandedRHS) { |
35468 | Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1); |
35469 | Known = KnownBits::commonBits(Known, Known2); |
35470 | } |
35471 | |
35472 | if (Known.countMinLeadingZeros() < BitWidth) |
35473 | Known.resetAll(); |
35474 | Known = Known.trunc(BitWidth); |
35475 | break; |
35476 | } |
35477 | case X86ISD::VBROADCAST: { |
35478 | SDValue Src = Op.getOperand(0); |
35479 | if (!Src.getSimpleValueType().isVector()) { |
35480 | Known = DAG.computeKnownBits(Src, Depth + 1); |
35481 | return; |
35482 | } |
35483 | break; |
35484 | } |
35485 | case X86ISD::ANDNP: { |
35486 | KnownBits Known2; |
35487 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
35488 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
35489 | |
35490 | |
35491 | Known.One &= Known2.Zero; |
35492 | Known.Zero |= Known2.One; |
35493 | break; |
35494 | } |
35495 | case X86ISD::FOR: { |
35496 | KnownBits Known2; |
35497 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
35498 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
35499 | |
35500 | Known |= Known2; |
35501 | break; |
35502 | } |
35503 | case X86ISD::PSADBW: { |
35504 | assert(VT.getScalarType() == MVT::i64 && |
35505 | Op.getOperand(0).getValueType().getScalarType() == MVT::i8 && |
35506 | "Unexpected PSADBW types"); |
35507 | |
35508 | |
35509 | Known.Zero.setBitsFrom(16); |
35510 | break; |
35511 | } |
35512 | case X86ISD::PMULUDQ: { |
35513 | KnownBits Known2; |
35514 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
35515 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
35516 | |
35517 | Known = Known.trunc(BitWidth / 2).zext(BitWidth); |
35518 | Known2 = Known2.trunc(BitWidth / 2).zext(BitWidth); |
35519 | Known = KnownBits::mul(Known, Known2); |
35520 | break; |
35521 | } |
35522 | case X86ISD::CMOV: { |
35523 | Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); |
35524 | |
35525 | if (Known.isUnknown()) |
35526 | break; |
35527 | KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); |
35528 | |
35529 | |
35530 | Known = KnownBits::commonBits(Known, Known2); |
35531 | break; |
35532 | } |
35533 | case X86ISD::BEXTR: |
35534 | case X86ISD::BEXTRI: { |
35535 | SDValue Op0 = Op.getOperand(0); |
35536 | SDValue Op1 = Op.getOperand(1); |
35537 | |
35538 | if (auto* Cst1 = dyn_cast<ConstantSDNode>(Op1)) { |
35539 | unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0); |
35540 | unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8); |
35541 | |
35542 | |
35543 | if (Length == 0) { |
35544 | Known.setAllZero(); |
35545 | break; |
35546 | } |
35547 | |
35548 | if ((Shift + Length) <= BitWidth) { |
35549 | Known = DAG.computeKnownBits(Op0, Depth + 1); |
35550 | Known = Known.extractBits(Length, Shift); |
35551 | Known = Known.zextOrTrunc(BitWidth); |
35552 | } |
35553 | } |
35554 | break; |
35555 | } |
35556 | case X86ISD::PDEP: { |
35557 | KnownBits Known2; |
35558 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
35559 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
35560 | |
35561 | Known.One.clearAllBits(); |
35562 | |
35563 | |
35564 | Known.Zero.setLowBits(Known2.countMinTrailingZeros()); |
35565 | break; |
35566 | } |
35567 | case X86ISD::PEXT: { |
35568 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
35569 | |
35570 | unsigned Count = Known.Zero.countPopulation(); |
35571 | Known.Zero = APInt::getHighBitsSet(BitWidth, Count); |
35572 | Known.One.clearAllBits(); |
35573 | break; |
35574 | } |
35575 | case X86ISD::VTRUNC: |
35576 | case X86ISD::VTRUNCS: |
35577 | case X86ISD::VTRUNCUS: |
35578 | case X86ISD::CVTSI2P: |
35579 | case X86ISD::CVTUI2P: |
35580 | case X86ISD::CVTP2SI: |
35581 | case X86ISD::CVTP2UI: |
35582 | case X86ISD::MCVTP2SI: |
35583 | case X86ISD::MCVTP2UI: |
35584 | case X86ISD::CVTTP2SI: |
35585 | case X86ISD::CVTTP2UI: |
35586 | case X86ISD::MCVTTP2SI: |
35587 | case X86ISD::MCVTTP2UI: |
35588 | case X86ISD::MCVTSI2P: |
35589 | case X86ISD::MCVTUI2P: |
35590 | case X86ISD::VFPROUND: |
35591 | case X86ISD::VMFPROUND: |
35592 | case X86ISD::CVTPS2PH: |
35593 | case X86ISD::MCVTPS2PH: { |
35594 | |
35595 | EVT SrcVT = Op.getOperand(0).getValueType(); |
35596 | if (SrcVT.isVector()) { |
35597 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
35598 | if (NumElts > NumSrcElts && |
35599 | DemandedElts.countTrailingZeros() >= NumSrcElts) |
35600 | Known.setAllZero(); |
35601 | } |
35602 | break; |
35603 | } |
35604 | case X86ISD::STRICT_CVTTP2SI: |
35605 | case X86ISD::STRICT_CVTTP2UI: |
35606 | case X86ISD::STRICT_CVTSI2P: |
35607 | case X86ISD::STRICT_CVTUI2P: |
35608 | case X86ISD::STRICT_VFPROUND: |
35609 | case X86ISD::STRICT_CVTPS2PH: { |
35610 | |
35611 | EVT SrcVT = Op.getOperand(1).getValueType(); |
35612 | if (SrcVT.isVector()) { |
35613 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
35614 | if (NumElts > NumSrcElts && |
35615 | DemandedElts.countTrailingZeros() >= NumSrcElts) |
35616 | Known.setAllZero(); |
35617 | } |
35618 | break; |
35619 | } |
35620 | case X86ISD::MOVQ2DQ: { |
35621 | |
35622 | if (DemandedElts.countTrailingZeros() >= (NumElts / 2)) |
35623 | Known.setAllZero(); |
35624 | break; |
35625 | } |
35626 | } |
35627 | |
35628 | |
35629 | |
35630 | if (isTargetShuffle(Opc)) { |
35631 | SmallVector<int, 64> Mask; |
35632 | SmallVector<SDValue, 2> Ops; |
35633 | if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask)) { |
35634 | unsigned NumOps = Ops.size(); |
35635 | unsigned NumElts = VT.getVectorNumElements(); |
35636 | if (Mask.size() == NumElts) { |
35637 | SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0)); |
35638 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
35639 | for (unsigned i = 0; i != NumElts; ++i) { |
35640 | if (!DemandedElts[i]) |
35641 | continue; |
35642 | int M = Mask[i]; |
35643 | if (M == SM_SentinelUndef) { |
35644 | |
35645 | |
35646 | Known.resetAll(); |
35647 | break; |
35648 | } |
35649 | if (M == SM_SentinelZero) { |
35650 | Known.One.clearAllBits(); |
35651 | continue; |
35652 | } |
35653 | assert(0 <= M && (unsigned)M < (NumOps * NumElts) && |
35654 | "Shuffle index out of range"); |
35655 | |
35656 | unsigned OpIdx = (unsigned)M / NumElts; |
35657 | unsigned EltIdx = (unsigned)M % NumElts; |
35658 | if (Ops[OpIdx].getValueType() != VT) { |
35659 | |
35660 | Known.resetAll(); |
35661 | break; |
35662 | } |
35663 | DemandedOps[OpIdx].setBit(EltIdx); |
35664 | } |
35665 | |
35666 | for (unsigned i = 0; i != NumOps && !Known.isUnknown(); ++i) { |
35667 | if (!DemandedOps[i]) |
35668 | continue; |
35669 | KnownBits Known2 = |
35670 | DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1); |
35671 | Known = KnownBits::commonBits(Known, Known2); |
35672 | } |
35673 | } |
35674 | } |
35675 | } |
35676 | } |
35677 | |
35678 | unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( |
35679 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
35680 | unsigned Depth) const { |
35681 | EVT VT = Op.getValueType(); |
35682 | unsigned VTBits = VT.getScalarSizeInBits(); |
35683 | unsigned Opcode = Op.getOpcode(); |
35684 | switch (Opcode) { |
35685 | case X86ISD::SETCC_CARRY: |
35686 | |
35687 | return VTBits; |
35688 | |
35689 | case X86ISD::VTRUNC: { |
35690 | SDValue Src = Op.getOperand(0); |
35691 | MVT SrcVT = Src.getSimpleValueType(); |
35692 | unsigned NumSrcBits = SrcVT.getScalarSizeInBits(); |
35693 | assert(VTBits < NumSrcBits && "Illegal truncation input type"); |
35694 | APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); |
35695 | unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedSrc, Depth + 1); |
35696 | if (Tmp > (NumSrcBits - VTBits)) |
35697 | return Tmp - (NumSrcBits - VTBits); |
35698 | return 1; |
35699 | } |
35700 | |
35701 | case X86ISD::PACKSS: { |
35702 | |
35703 | APInt DemandedLHS, DemandedRHS; |
35704 | getPackDemandedElts(Op.getValueType(), DemandedElts, DemandedLHS, |
35705 | DemandedRHS); |
35706 | |
35707 | unsigned SrcBits = Op.getOperand(0).getScalarValueSizeInBits(); |
35708 | unsigned Tmp0 = SrcBits, Tmp1 = SrcBits; |
35709 | if (!!DemandedLHS) |
35710 | Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); |
35711 | if (!!DemandedRHS) |
35712 | Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); |
35713 | unsigned Tmp = std::min(Tmp0, Tmp1); |
35714 | if (Tmp > (SrcBits - VTBits)) |
35715 | return Tmp - (SrcBits - VTBits); |
35716 | return 1; |
35717 | } |
35718 | |
35719 | case X86ISD::VBROADCAST: { |
35720 | SDValue Src = Op.getOperand(0); |
35721 | if (!Src.getSimpleValueType().isVector()) |
35722 | return DAG.ComputeNumSignBits(Src, Depth + 1); |
35723 | break; |
35724 | } |
35725 | |
35726 | case X86ISD::VSHLI: { |
35727 | SDValue Src = Op.getOperand(0); |
35728 | const APInt &ShiftVal = Op.getConstantOperandAPInt(1); |
35729 | if (ShiftVal.uge(VTBits)) |
35730 | return VTBits; |
35731 | unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1); |
35732 | if (ShiftVal.uge(Tmp)) |
35733 | return 1; |
35734 | return Tmp - ShiftVal.getZExtValue(); |
35735 | } |
35736 | |
35737 | case X86ISD::VSRAI: { |
35738 | SDValue Src = Op.getOperand(0); |
35739 | APInt ShiftVal = Op.getConstantOperandAPInt(1); |
35740 | if (ShiftVal.uge(VTBits - 1)) |
35741 | return VTBits; |
35742 | unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1); |
35743 | ShiftVal += Tmp; |
35744 | return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); |
35745 | } |
35746 | |
35747 | case X86ISD::FSETCC: |
35748 | |
35749 | if (VT == MVT::f32 || VT == MVT::f64 || |
35750 | ((VT == MVT::v4f32 || VT == MVT::v2f64) && DemandedElts == 1)) |
35751 | return VTBits; |
35752 | break; |
35753 | |
35754 | case X86ISD::PCMPGT: |
35755 | case X86ISD::PCMPEQ: |
35756 | case X86ISD::CMPP: |
35757 | case X86ISD::VPCOM: |
35758 | case X86ISD::VPCOMU: |
35759 | |
35760 | return VTBits; |
35761 | |
35762 | case X86ISD::ANDNP: { |
35763 | unsigned Tmp0 = |
35764 | DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); |
35765 | if (Tmp0 == 1) return 1; |
35766 | unsigned Tmp1 = |
35767 | DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); |
35768 | return std::min(Tmp0, Tmp1); |
35769 | } |
35770 | |
35771 | case X86ISD::CMOV: { |
35772 | unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1); |
35773 | if (Tmp0 == 1) return 1; |
35774 | unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1); |
35775 | return std::min(Tmp0, Tmp1); |
35776 | } |
35777 | } |
35778 | |
35779 | |
35780 | |
35781 | if (isTargetShuffle(Opcode)) { |
35782 | SmallVector<int, 64> Mask; |
35783 | SmallVector<SDValue, 2> Ops; |
35784 | if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask)) { |
35785 | unsigned NumOps = Ops.size(); |
35786 | unsigned NumElts = VT.getVectorNumElements(); |
35787 | if (Mask.size() == NumElts) { |
35788 | SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0)); |
35789 | for (unsigned i = 0; i != NumElts; ++i) { |
35790 | if (!DemandedElts[i]) |
35791 | continue; |
35792 | int M = Mask[i]; |
35793 | if (M == SM_SentinelUndef) { |
35794 | |
35795 | |
35796 | return 1; |
35797 | } else if (M == SM_SentinelZero) { |
35798 | |
35799 | continue; |
35800 | } |
35801 | assert(0 <= M && (unsigned)M < (NumOps * NumElts) && |
35802 | "Shuffle index out of range"); |
35803 | |
35804 | unsigned OpIdx = (unsigned)M / NumElts; |
35805 | unsigned EltIdx = (unsigned)M % NumElts; |
35806 | if (Ops[OpIdx].getValueType() != VT) { |
35807 | |
35808 | return 1; |
35809 | } |
35810 | DemandedOps[OpIdx].setBit(EltIdx); |
35811 | } |
35812 | unsigned Tmp0 = VTBits; |
35813 | for (unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) { |
35814 | if (!DemandedOps[i]) |
35815 | continue; |
35816 | unsigned Tmp1 = |
35817 | DAG.ComputeNumSignBits(Ops[i], DemandedOps[i], Depth + 1); |
35818 | Tmp0 = std::min(Tmp0, Tmp1); |
35819 | } |
35820 | return Tmp0; |
35821 | } |
35822 | } |
35823 | } |
35824 | |
35825 | |
35826 | return 1; |
35827 | } |
35828 | |
35829 | SDValue X86TargetLowering::unwrapAddress(SDValue N) const { |
35830 | if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP) |
35831 | return N->getOperand(0); |
35832 | return N; |
35833 | } |
35834 | |
35835 | |
35836 | |
35837 | static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT, |
35838 | SelectionDAG &DAG) { |
35839 | |
35840 | if (!LN->isSimple()) |
35841 | return SDValue(); |
35842 | |
35843 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
35844 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
35845 | return DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, SDLoc(LN), Tys, Ops, MemVT, |
35846 | LN->getPointerInfo(), LN->getOriginalAlign(), |
35847 | LN->getMemOperand()->getFlags()); |
35848 | } |
35849 | |
35850 | |
35851 | |
35852 | |
35853 | static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, |
35854 | bool AllowFloatDomain, bool AllowIntDomain, |
35855 | SDValue &V1, const SDLoc &DL, SelectionDAG &DAG, |
35856 | const X86Subtarget &Subtarget, unsigned &Shuffle, |
35857 | MVT &SrcVT, MVT &DstVT) { |
35858 | unsigned NumMaskElts = Mask.size(); |
35859 | unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); |
35860 | |
35861 | |
35862 | if (Mask[0] == 0 && |
35863 | (MaskEltSize == 32 || (MaskEltSize == 16 && Subtarget.hasFP16()))) { |
35864 | if ((isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) || |
35865 | (V1.getOpcode() == ISD::SCALAR_TO_VECTOR && |
35866 | isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) { |
35867 | Shuffle = X86ISD::VZEXT_MOVL; |
35868 | SrcVT = DstVT = |
35869 | !Subtarget.hasSSE2() && MaskEltSize == 32 ? MVT::v4f32 : MaskVT; |
35870 | return true; |
35871 | } |
35872 | } |
35873 | |
35874 | |
35875 | |
35876 | if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || |
35877 | (MaskVT.is256BitVector() && Subtarget.hasInt256()))) { |
35878 | unsigned MaxScale = 64 / MaskEltSize; |
35879 | for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) { |
35880 | bool MatchAny = true; |
35881 | bool MatchZero = true; |
35882 | unsigned NumDstElts = NumMaskElts / Scale; |
35883 | for (unsigned i = 0; i != NumDstElts && (MatchAny || MatchZero); ++i) { |
35884 | if (!isUndefOrEqual(Mask[i * Scale], (int)i)) { |
35885 | MatchAny = MatchZero = false; |
35886 | break; |
35887 | } |
35888 | MatchAny &= isUndefInRange(Mask, (i * Scale) + 1, Scale - 1); |
35889 | MatchZero &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1); |
35890 | } |
35891 | if (MatchAny || MatchZero) { |
35892 | assert(MatchZero && "Failed to match zext but matched aext?"); |
35893 | unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize); |
35894 | MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() : |
35895 | MVT::getIntegerVT(MaskEltSize); |
35896 | SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize); |
35897 | |
35898 | if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) |
35899 | V1 = extractSubVector(V1, 0, DAG, DL, SrcSize); |
35900 | |
35901 | Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND); |
35902 | if (SrcVT.getVectorNumElements() != NumDstElts) |
35903 | Shuffle = getOpcode_EXTEND_VECTOR_INREG(Shuffle); |
35904 | |
35905 | DstVT = MVT::getIntegerVT(Scale * MaskEltSize); |
35906 | DstVT = MVT::getVectorVT(DstVT, NumDstElts); |
35907 | return true; |
35908 | } |
35909 | } |
35910 | } |
35911 | |
35912 | |
35913 | if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) && |
35914 | isUndefOrEqual(Mask[0], 0) && |
35915 | isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { |
35916 | Shuffle = X86ISD::VZEXT_MOVL; |
35917 | SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; |
35918 | return true; |
35919 | } |
35920 | |
35921 | |
35922 | |
35923 | |
35924 | if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) { |
35925 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) { |
35926 | Shuffle = X86ISD::MOVDDUP; |
35927 | SrcVT = DstVT = MVT::v2f64; |
35928 | return true; |
35929 | } |
35930 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { |
35931 | Shuffle = X86ISD::MOVSLDUP; |
35932 | SrcVT = DstVT = MVT::v4f32; |
35933 | return true; |
35934 | } |
35935 | if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) { |
35936 | Shuffle = X86ISD::MOVSHDUP; |
35937 | SrcVT = DstVT = MVT::v4f32; |
35938 | return true; |
35939 | } |
35940 | } |
35941 | |
35942 | if (MaskVT.is256BitVector() && AllowFloatDomain) { |
35943 | assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles"); |
35944 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { |
35945 | Shuffle = X86ISD::MOVDDUP; |
35946 | SrcVT = DstVT = MVT::v4f64; |
35947 | return true; |
35948 | } |
35949 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { |
35950 | Shuffle = X86ISD::MOVSLDUP; |
35951 | SrcVT = DstVT = MVT::v8f32; |
35952 | return true; |
35953 | } |
35954 | if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) { |
35955 | Shuffle = X86ISD::MOVSHDUP; |
35956 | SrcVT = DstVT = MVT::v8f32; |
35957 | return true; |
35958 | } |
35959 | } |
35960 | |
35961 | if (MaskVT.is512BitVector() && AllowFloatDomain) { |
35962 | assert(Subtarget.hasAVX512() && |
35963 | "AVX512 required for 512-bit vector shuffles"); |
35964 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { |
35965 | Shuffle = X86ISD::MOVDDUP; |
35966 | SrcVT = DstVT = MVT::v8f64; |
35967 | return true; |
35968 | } |
35969 | if (isTargetShuffleEquivalent( |
35970 | MaskVT, Mask, |
35971 | {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) { |
35972 | Shuffle = X86ISD::MOVSLDUP; |
35973 | SrcVT = DstVT = MVT::v16f32; |
35974 | return true; |
35975 | } |
35976 | if (isTargetShuffleEquivalent( |
35977 | MaskVT, Mask, |
35978 | {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) { |
35979 | Shuffle = X86ISD::MOVSHDUP; |
35980 | SrcVT = DstVT = MVT::v16f32; |
35981 | return true; |
35982 | } |
35983 | } |
35984 | |
35985 | return false; |
35986 | } |
35987 | |
35988 | |
35989 | |
35990 | |
35991 | static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask, |
35992 | const APInt &Zeroable, |
35993 | bool AllowFloatDomain, bool AllowIntDomain, |
35994 | const X86Subtarget &Subtarget, |
35995 | unsigned &Shuffle, MVT &ShuffleVT, |
35996 | unsigned &PermuteImm) { |
35997 | unsigned NumMaskElts = Mask.size(); |
35998 | unsigned InputSizeInBits = MaskVT.getSizeInBits(); |
35999 | unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; |
36000 | MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); |
36001 | bool ContainsZeros = isAnyZero(Mask); |
36002 | |
36003 | |
36004 | if (!ContainsZeros && MaskScalarSizeInBits == 64) { |
36005 | |
36006 | if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { |
36007 | |
36008 | if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) { |
36009 | Shuffle = X86ISD::VPERMI; |
36010 | ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); |
36011 | PermuteImm = getV4X86ShuffleImm(Mask); |
36012 | return true; |
36013 | } |
36014 | if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) { |
36015 | SmallVector<int, 4> RepeatedMask; |
36016 | if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { |
36017 | Shuffle = X86ISD::VPERMI; |
36018 | ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); |
36019 | PermuteImm = getV4X86ShuffleImm(RepeatedMask); |
36020 | return true; |
36021 | } |
36022 | } |
36023 | } else if (AllowFloatDomain && Subtarget.hasAVX()) { |
36024 | |
36025 | Shuffle = X86ISD::VPERMILPI; |
36026 | ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); |
36027 | PermuteImm = 0; |
36028 | for (int i = 0, e = Mask.size(); i != e; ++i) { |
36029 | int M = Mask[i]; |
36030 | if (M == SM_SentinelUndef) |
36031 | continue; |
36032 | assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); |
36033 | PermuteImm |= (M & 1) << i; |
36034 | } |
36035 | return true; |
36036 | } |
36037 | } |
36038 | |
36039 | |
36040 | |
36041 | |
36042 | if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) && |
36043 | !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) { |
36044 | SmallVector<int, 4> RepeatedMask; |
36045 | if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { |
36046 | |
36047 | SmallVector<int, 4> WordMask = RepeatedMask; |
36048 | if (MaskScalarSizeInBits == 64) |
36049 | narrowShuffleMaskElts(2, RepeatedMask, WordMask); |
36050 | |
36051 | Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI); |
36052 | ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32); |
36053 | ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); |
36054 | PermuteImm = getV4X86ShuffleImm(WordMask); |
36055 | return true; |
36056 | } |
36057 | } |
36058 | |
36059 | |
36060 | if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 && |
36061 | ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
36062 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
36063 | (MaskVT.is512BitVector() && Subtarget.hasBWI()))) { |
36064 | SmallVector<int, 4> RepeatedMask; |
36065 | if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { |
36066 | ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4); |
36067 | ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4); |
36068 | |
36069 | |
36070 | if (isUndefOrInRange(LoMask, 0, 4) && |
36071 | isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { |
36072 | Shuffle = X86ISD::PSHUFLW; |
36073 | ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16); |
36074 | PermuteImm = getV4X86ShuffleImm(LoMask); |
36075 | return true; |
36076 | } |
36077 | |
36078 | |
36079 | if (isUndefOrInRange(HiMask, 4, 8) && |
36080 | isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { |
36081 | |
36082 | int OffsetHiMask[4]; |
36083 | for (int i = 0; i != 4; ++i) |
36084 | OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4); |
36085 | |
36086 | Shuffle = X86ISD::PSHUFHW; |
36087 | ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16); |
36088 | PermuteImm = getV4X86ShuffleImm(OffsetHiMask); |
36089 | return true; |
36090 | } |
36091 | } |
36092 | } |
36093 | |
36094 | |
36095 | if (AllowIntDomain && |
36096 | ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
36097 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
36098 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
36099 | int ShiftAmt = matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits, |
36100 | Mask, 0, Zeroable, Subtarget); |
36101 | if (0 < ShiftAmt && (!ShuffleVT.is512BitVector() || Subtarget.hasBWI() || |
36102 | 32 <= ShuffleVT.getScalarSizeInBits())) { |
36103 | PermuteImm = (unsigned)ShiftAmt; |
36104 | return true; |
36105 | } |
36106 | } |
36107 | |
36108 | |
36109 | if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits < 64 && |
36110 | ((MaskVT.is128BitVector() && Subtarget.hasXOP()) || |
36111 | Subtarget.hasAVX512())) { |
36112 | int RotateAmt = matchShuffleAsBitRotate(ShuffleVT, MaskScalarSizeInBits, |
36113 | Subtarget, Mask); |
36114 | if (0 < RotateAmt) { |
36115 | Shuffle = X86ISD::VROTLI; |
36116 | PermuteImm = (unsigned)RotateAmt; |
36117 | return true; |
36118 | } |
36119 | } |
36120 | |
36121 | return false; |
36122 | } |
36123 | |
36124 | |
36125 | |
36126 | |
36127 | static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask, |
36128 | bool AllowFloatDomain, bool AllowIntDomain, |
36129 | SDValue &V1, SDValue &V2, const SDLoc &DL, |
36130 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
36131 | unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, |
36132 | bool IsUnary) { |
36133 | unsigned NumMaskElts = Mask.size(); |
36134 | unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); |
36135 | |
36136 | if (MaskVT.is128BitVector()) { |
36137 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) { |
36138 | V2 = V1; |
36139 | V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1); |
36140 | Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS; |
36141 | SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; |
36142 | return true; |
36143 | } |
36144 | if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) { |
36145 | V2 = V1; |
36146 | Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS; |
36147 | SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; |
36148 | return true; |
36149 | } |
36150 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) && |
36151 | Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) { |
36152 | std::swap(V1, V2); |
36153 | Shuffle = X86ISD::MOVSD; |
36154 | SrcVT = DstVT = MVT::v2f64; |
36155 | return true; |
36156 | } |
36157 | if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) && |
36158 | (AllowFloatDomain || !Subtarget.hasSSE41())) { |
36159 | Shuffle = X86ISD::MOVSS; |
36160 | SrcVT = DstVT = MVT::v4f32; |
36161 | return true; |
36162 | } |
36163 | if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7}) && |
36164 | Subtarget.hasFP16()) { |
36165 | Shuffle = X86ISD::MOVSH; |
36166 | SrcVT = DstVT = MVT::v8f16; |
36167 | return true; |
36168 | } |
36169 | } |
36170 | |
36171 | |
36172 | if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) || |
36173 | ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) || |
36174 | ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) { |
36175 | if (matchShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG, |
36176 | Subtarget)) { |
36177 | DstVT = MaskVT; |
36178 | return true; |
36179 | } |
36180 | } |
36181 | |
36182 | |
36183 | if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) || |
36184 | (MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
36185 | (MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) || |
36186 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
36187 | (MaskVT.is512BitVector() && Subtarget.hasAVX512())) { |
36188 | if (matchShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL, DAG, |
36189 | Subtarget)) { |
36190 | SrcVT = DstVT = MaskVT; |
36191 | if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) |
36192 | SrcVT = DstVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64); |
36193 | return true; |
36194 | } |
36195 | } |
36196 | |
36197 | |
36198 | |
36199 | if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && |
36200 | (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) { |
36201 | bool IsBlend = true; |
36202 | unsigned NumV1Elts = V1.getValueType().getVectorNumElements(); |
36203 | unsigned NumV2Elts = V2.getValueType().getVectorNumElements(); |
36204 | unsigned Scale1 = NumV1Elts / NumMaskElts; |
36205 | unsigned Scale2 = NumV2Elts / NumMaskElts; |
36206 | APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts); |
36207 | APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts); |
36208 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
36209 | int M = Mask[i]; |
36210 | if (M == SM_SentinelUndef) |
36211 | continue; |
36212 | if (M == SM_SentinelZero) { |
36213 | DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); |
36214 | DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); |
36215 | continue; |
36216 | } |
36217 | if (M == (int)i) { |
36218 | DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); |
36219 | continue; |
36220 | } |
36221 | if (M == (int)(i + NumMaskElts)) { |
36222 | DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); |
36223 | continue; |
36224 | } |
36225 | IsBlend = false; |
36226 | break; |
36227 | } |
36228 | if (IsBlend && |
36229 | DAG.computeKnownBits(V1, DemandedZeroV1).isZero() && |
36230 | DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) { |
36231 | Shuffle = ISD::OR; |
36232 | SrcVT = DstVT = MaskVT.changeTypeToInteger(); |
36233 | return true; |
36234 | } |
36235 | } |
36236 | |
36237 | return false; |
36238 | } |
36239 | |
36240 | static bool matchBinaryPermuteShuffle( |
36241 | MVT MaskVT, ArrayRef<int> Mask, const APInt &Zeroable, |
36242 | bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, |
36243 | const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, |
36244 | unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { |
36245 | unsigned NumMaskElts = Mask.size(); |
36246 | unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); |
36247 | |
36248 | |
36249 | if (AllowIntDomain && (EltSizeInBits == 64 || EltSizeInBits == 32) && |
36250 | ((MaskVT.is128BitVector() && Subtarget.hasVLX()) || |
36251 | (MaskVT.is256BitVector() && Subtarget.hasVLX()) || |
36252 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
36253 | if (!isAnyZero(Mask)) { |
36254 | int Rotation = matchShuffleAsElementRotate(V1, V2, Mask); |
36255 | if (0 < Rotation) { |
36256 | Shuffle = X86ISD::VALIGN; |
36257 | if (EltSizeInBits == 64) |
36258 | ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64); |
36259 | else |
36260 | ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32); |
36261 | PermuteImm = Rotation; |
36262 | return true; |
36263 | } |
36264 | } |
36265 | } |
36266 | |
36267 | |
36268 | if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) || |
36269 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
36270 | (MaskVT.is512BitVector() && Subtarget.hasBWI()))) { |
36271 | int ByteRotation = matchShuffleAsByteRotate(MaskVT, V1, V2, Mask); |
36272 | if (0 < ByteRotation) { |
36273 | Shuffle = X86ISD::PALIGNR; |
36274 | ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8); |
36275 | PermuteImm = ByteRotation; |
36276 | return true; |
36277 | } |
36278 | } |
36279 | |
36280 | |
36281 | if ((NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) || |
36282 | (Subtarget.hasAVX() && MaskVT.is256BitVector()))) || |
36283 | (MaskVT == MVT::v16i16 && Subtarget.hasAVX2())) { |
36284 | uint64_t BlendMask = 0; |
36285 | bool ForceV1Zero = false, ForceV2Zero = false; |
36286 | SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end()); |
36287 | if (matchShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero, |
36288 | ForceV2Zero, BlendMask)) { |
36289 | if (MaskVT == MVT::v16i16) { |
36290 | |
36291 | SmallVector<int, 8> RepeatedMask; |
36292 | if (isRepeatedTargetShuffleMask(128, MaskVT, TargetMask, |
36293 | RepeatedMask)) { |
36294 | assert(RepeatedMask.size() == 8 && |
36295 | "Repeated mask size doesn't match!"); |
36296 | PermuteImm = 0; |
36297 | for (int i = 0; i < 8; ++i) |
36298 | if (RepeatedMask[i] >= 8) |
36299 | PermuteImm |= 1 << i; |
36300 | V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; |
36301 | V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; |
36302 | Shuffle = X86ISD::BLENDI; |
36303 | ShuffleVT = MaskVT; |
36304 | return true; |
36305 | } |
36306 | } else { |
36307 | V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; |
36308 | V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; |
36309 | PermuteImm = (unsigned)BlendMask; |
36310 | Shuffle = X86ISD::BLENDI; |
36311 | ShuffleVT = MaskVT; |
36312 | return true; |
36313 | } |
36314 | } |
36315 | } |
36316 | |
36317 | |
36318 | |
36319 | if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && |
36320 | MaskVT.is128BitVector() && isAnyZero(Mask) && |
36321 | matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { |
36322 | Shuffle = X86ISD::INSERTPS; |
36323 | ShuffleVT = MVT::v4f32; |
36324 | return true; |
36325 | } |
36326 | |
36327 | |
36328 | if (AllowFloatDomain && EltSizeInBits == 64 && |
36329 | ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
36330 | (MaskVT.is256BitVector() && Subtarget.hasAVX()) || |
36331 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
36332 | bool ForceV1Zero = false, ForceV2Zero = false; |
36333 | if (matchShuffleWithSHUFPD(MaskVT, V1, V2, ForceV1Zero, ForceV2Zero, |
36334 | PermuteImm, Mask, Zeroable)) { |
36335 | V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; |
36336 | V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; |
36337 | Shuffle = X86ISD::SHUFP; |
36338 | ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64); |
36339 | return true; |
36340 | } |
36341 | } |
36342 | |
36343 | |
36344 | if (AllowFloatDomain && EltSizeInBits == 32 && |
36345 | ((MaskVT.is128BitVector() && Subtarget.hasSSE1()) || |
36346 | (MaskVT.is256BitVector() && Subtarget.hasAVX()) || |
36347 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
36348 | SmallVector<int, 4> RepeatedMask; |
36349 | if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) { |
36350 | |
36351 | |
36352 | auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) { |
36353 | int M0 = RepeatedMask[Offset]; |
36354 | int M1 = RepeatedMask[Offset + 1]; |
36355 | |
36356 | if (isUndefInRange(RepeatedMask, Offset, 2)) { |
36357 | return DAG.getUNDEF(MaskVT); |
36358 | } else if (isUndefOrZeroInRange(RepeatedMask, Offset, 2)) { |
36359 | S0 = (SM_SentinelUndef == M0 ? -1 : 0); |
36360 | S1 = (SM_SentinelUndef == M1 ? -1 : 1); |
36361 | return getZeroVector(MaskVT, Subtarget, DAG, DL); |
36362 | } else if (isUndefOrInRange(M0, 0, 4) && isUndefOrInRange(M1, 0, 4)) { |
36363 | S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); |
36364 | S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); |
36365 | return V1; |
36366 | } else if (isUndefOrInRange(M0, 4, 8) && isUndefOrInRange(M1, 4, 8)) { |
36367 | S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); |
36368 | S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); |
36369 | return V2; |
36370 | } |
36371 | |
36372 | return SDValue(); |
36373 | }; |
36374 | |
36375 | int ShufMask[4] = {-1, -1, -1, -1}; |
36376 | SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]); |
36377 | SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]); |
36378 | |
36379 | if (Lo && Hi) { |
36380 | V1 = Lo; |
36381 | V2 = Hi; |
36382 | Shuffle = X86ISD::SHUFP; |
36383 | ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32); |
36384 | PermuteImm = getV4X86ShuffleImm(ShufMask); |
36385 | return true; |
36386 | } |
36387 | } |
36388 | } |
36389 | |
36390 | |
36391 | if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && |
36392 | MaskVT.is128BitVector() && |
36393 | matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { |
36394 | Shuffle = X86ISD::INSERTPS; |
36395 | ShuffleVT = MVT::v4f32; |
36396 | return true; |
36397 | } |
36398 | |
36399 | return false; |
36400 | } |
36401 | |
36402 | static SDValue combineX86ShuffleChainWithExtract( |
36403 | ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth, |
36404 | bool HasVariableMask, bool AllowVariableCrossLaneMask, |
36405 | bool AllowVariablePerLaneMask, SelectionDAG &DAG, |
36406 | const X86Subtarget &Subtarget); |
36407 | |
36408 | |
36409 | |
36410 | |
36411 | |
36412 | |
36413 | |
36414 | |
36415 | |
36416 | |
36417 | static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, |
36418 | ArrayRef<int> BaseMask, int Depth, |
36419 | bool HasVariableMask, |
36420 | bool AllowVariableCrossLaneMask, |
36421 | bool AllowVariablePerLaneMask, |
36422 | SelectionDAG &DAG, |
36423 | const X86Subtarget &Subtarget) { |
36424 | assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!"); |
36425 | assert((Inputs.size() == 1 || Inputs.size() == 2) && |
36426 | "Unexpected number of shuffle inputs!"); |
36427 | |
36428 | MVT RootVT = Root.getSimpleValueType(); |
36429 | unsigned RootSizeInBits = RootVT.getSizeInBits(); |
36430 | unsigned NumRootElts = RootVT.getVectorNumElements(); |
36431 | |
36432 | |
36433 | |
36434 | auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) { |
36435 | return DAG.getBitcast(VT, Op); |
36436 | }; |
36437 | |
36438 | |
36439 | |
36440 | bool UnaryShuffle = (Inputs.size() == 1); |
36441 | SDValue V1 = peekThroughBitcasts(Inputs[0]); |
36442 | SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType()) |
36443 | : peekThroughBitcasts(Inputs[1])); |
36444 | |
36445 | MVT VT1 = V1.getSimpleValueType(); |
36446 | MVT VT2 = V2.getSimpleValueType(); |
36447 | assert(VT1.getSizeInBits() == RootSizeInBits && |
36448 | VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch"); |
36449 | |
36450 | SDLoc DL(Root); |
36451 | SDValue Res; |
36452 | |
36453 | unsigned NumBaseMaskElts = BaseMask.size(); |
36454 | if (NumBaseMaskElts == 1) { |
36455 | assert(BaseMask[0] == 0 && "Invalid shuffle index found!"); |
36456 | return CanonicalizeShuffleInput(RootVT, V1); |
36457 | } |
36458 | |
36459 | bool OptForSize = DAG.shouldOptForSize(); |
36460 | unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; |
36461 | bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || |
36462 | (RootVT.isFloatingPoint() && Depth >= 1) || |
36463 | (RootVT.is256BitVector() && !Subtarget.hasAVX2()); |
36464 | |
36465 | |
36466 | |
36467 | |
36468 | bool IsMaskedShuffle = false; |
36469 | if (RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128)) { |
36470 | if (Root.hasOneUse() && Root->use_begin()->getOpcode() == ISD::VSELECT && |
36471 | Root->use_begin()->getOperand(0).getScalarValueSizeInBits() == 1) { |
36472 | IsMaskedShuffle = true; |
36473 | } |
36474 | } |
36475 | |
36476 | |
36477 | |
36478 | |
36479 | if (UnaryShuffle && isTargetShuffleSplat(V1) && !isAnyZero(BaseMask) && |
36480 | (BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && |
36481 | V1.getValueSizeInBits() >= RootSizeInBits) { |
36482 | return CanonicalizeShuffleInput(RootVT, V1); |
36483 | } |
36484 | |
36485 | SmallVector<int, 64> Mask(BaseMask.begin(), BaseMask.end()); |
36486 | |
36487 | |
36488 | |
36489 | if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits) { |
36490 | SmallVector<int> ScaledMask, IdentityMask; |
36491 | unsigned NumElts = VT1.getVectorNumElements(); |
36492 | if (Mask.size() <= NumElts && |
36493 | scaleShuffleElements(Mask, NumElts, ScaledMask)) { |
36494 | for (unsigned i = 0; i != NumElts; ++i) |
36495 | IdentityMask.push_back(i); |
36496 | if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2)) |
36497 | return CanonicalizeShuffleInput(RootVT, V1); |
36498 | } |
36499 | } |
36500 | |
36501 | |
36502 | if (RootVT.is512BitVector() && |
36503 | (NumBaseMaskElts == 2 || NumBaseMaskElts == 4)) { |
36504 | |
36505 | |
36506 | |
36507 | if (isUndefOrZeroInRange(Mask, 1, NumBaseMaskElts - 1)) { |
36508 | if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) |
36509 | return SDValue(); |
36510 | assert(isInRange(Mask[0], 0, NumBaseMaskElts) && |
36511 | "Unexpected lane shuffle"); |
36512 | Res = CanonicalizeShuffleInput(RootVT, V1); |
36513 | unsigned SubIdx = Mask[0] * (NumRootElts / NumBaseMaskElts); |
36514 | bool UseZero = isAnyZero(Mask); |
36515 | Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits); |
36516 | return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits); |
36517 | } |
36518 | |
36519 | |
36520 | SmallVector<int, 4> ScaledMask; |
36521 | assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size"); |
36522 | narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, Mask, ScaledMask); |
36523 | |
36524 | |
36525 | auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL, |
36526 | ArrayRef<int> ScaledMask, SDValue V1, SDValue V2, |
36527 | SelectionDAG &DAG) { |
36528 | unsigned PermMask = 0; |
36529 | |
36530 | SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)}; |
36531 | for (int i = 0; i < 4; ++i) { |
36532 | assert(ScaledMask[i] >= -1 && "Illegal shuffle sentinel value"); |
36533 | if (ScaledMask[i] < 0) |
36534 | continue; |
36535 | |
36536 | SDValue Op = ScaledMask[i] >= 4 ? V2 : V1; |
36537 | unsigned OpIndex = i / 2; |
36538 | if (Ops[OpIndex].isUndef()) |
36539 | Ops[OpIndex] = Op; |
36540 | else if (Ops[OpIndex] != Op) |
36541 | return SDValue(); |
36542 | |
36543 | |
36544 | |
36545 | |
36546 | PermMask |= (ScaledMask[i] % 4) << (i * 2); |
36547 | } |
36548 | |
36549 | return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT, |
36550 | CanonicalizeShuffleInput(ShuffleVT, Ops[0]), |
36551 | CanonicalizeShuffleInput(ShuffleVT, Ops[1]), |
36552 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
36553 | }; |
36554 | |
36555 | |
36556 | |
36557 | |
36558 | bool PreferPERMQ = UnaryShuffle && isUndefOrInRange(ScaledMask[0], 0, 2) && |
36559 | isUndefOrInRange(ScaledMask[1], 0, 2) && |
36560 | isUndefOrInRange(ScaledMask[2], 2, 4) && |
36561 | isUndefOrInRange(ScaledMask[3], 2, 4) && |
36562 | (ScaledMask[0] < 0 || ScaledMask[2] < 0 || |
36563 | ScaledMask[0] == (ScaledMask[2] % 2)) && |
36564 | (ScaledMask[1] < 0 || ScaledMask[3] < 0 || |
36565 | ScaledMask[1] == (ScaledMask[3] % 2)); |
36566 | |
36567 | if (!isAnyZero(ScaledMask) && !PreferPERMQ) { |
36568 | if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128) |
36569 | return SDValue(); |
36570 | MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64); |
36571 | if (SDValue V = MatchSHUF128(ShuffleVT, DL, ScaledMask, V1, V2, DAG)) |
36572 | return DAG.getBitcast(RootVT, V); |
36573 | } |
36574 | } |
36575 | |
36576 | |
36577 | if (RootVT.is256BitVector() && NumBaseMaskElts == 2) { |
36578 | |
36579 | |
36580 | |
36581 | if (isUndefOrZero(Mask[1])) { |
36582 | if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) |
36583 | return SDValue(); |
36584 | assert(isInRange(Mask[0], 0, 2) && "Unexpected lane shuffle"); |
36585 | Res = CanonicalizeShuffleInput(RootVT, V1); |
36586 | Res = extract128BitVector(Res, Mask[0] * (NumRootElts / 2), DAG, DL); |
36587 | return widenSubVector(Res, Mask[1] == SM_SentinelZero, Subtarget, DAG, DL, |
36588 | 256); |
36589 | } |
36590 | |
36591 | |
36592 | |
36593 | |
36594 | if (Mask[0] == 0 && Mask[1] == 0 && !Subtarget.hasAVX2()) { |
36595 | if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) |
36596 | return SDValue(); |
36597 | Res = CanonicalizeShuffleInput(RootVT, V1); |
36598 | Res = extractSubVector(Res, 0, DAG, DL, 128); |
36599 | return concatSubVectors(Res, Res, DAG, DL); |
36600 | } |
36601 | |
36602 | if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128) |
36603 | return SDValue(); |
36604 | |
36605 | |
36606 | |
36607 | |
36608 | if (UnaryShuffle && |
36609 | !(Subtarget.hasAVX2() && isUndefOrInRange(Mask, 0, 2)) && |
36610 | (OptForSize || !isSequentialOrUndefOrZeroInRange(Mask, 0, 2, 0))) { |
36611 | unsigned PermMask = 0; |
36612 | PermMask |= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0); |
36613 | PermMask |= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4); |
36614 | return DAG.getNode( |
36615 | X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1), |
36616 | DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
36617 | } |
36618 | |
36619 | if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128) |
36620 | return SDValue(); |
36621 | |
36622 | |
36623 | if (!UnaryShuffle && !IsMaskedShuffle) { |
36624 | assert(llvm::all_of(Mask, [](int M) { return 0 <= M && M < 4; }) && |
36625 | "Unexpected shuffle sentinel value"); |
36626 | |
36627 | if (!((Mask[0] == 0 && Mask[1] == 3) || (Mask[0] == 2 && Mask[1] == 1))) { |
36628 | unsigned PermMask = 0; |
36629 | PermMask |= ((Mask[0] & 3) << 0); |
36630 | PermMask |= ((Mask[1] & 3) << 4); |
36631 | SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2; |
36632 | SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2; |
36633 | return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT, |
36634 | CanonicalizeShuffleInput(RootVT, LHS), |
36635 | CanonicalizeShuffleInput(RootVT, RHS), |
36636 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
36637 | } |
36638 | } |
36639 | } |
36640 | |
36641 | |
36642 | |
36643 | if (BaseMaskEltSizeInBits > 64) { |
36644 | assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size"); |
36645 | int MaskScale = BaseMaskEltSizeInBits / 64; |
36646 | SmallVector<int, 64> ScaledMask; |
36647 | narrowShuffleMaskElts(MaskScale, Mask, ScaledMask); |
36648 | Mask = std::move(ScaledMask); |
36649 | } |
36650 | |
36651 | |
36652 | |
36653 | |
36654 | if (IsMaskedShuffle && NumRootElts > Mask.size()) { |
36655 | assert((NumRootElts % Mask.size()) == 0 && "Illegal mask size"); |
36656 | int MaskScale = NumRootElts / Mask.size(); |
36657 | SmallVector<int, 64> ScaledMask; |
36658 | narrowShuffleMaskElts(MaskScale, Mask, ScaledMask); |
36659 | Mask = std::move(ScaledMask); |
36660 | } |
36661 | |
36662 | unsigned NumMaskElts = Mask.size(); |
36663 | unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts; |
36664 | |
36665 | |
36666 | FloatDomain &= (32 <= MaskEltSizeInBits); |
36667 | MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits) |
36668 | : MVT::getIntegerVT(MaskEltSizeInBits); |
36669 | MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts); |
36670 | |
36671 | |
36672 | if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) |
36673 | return SDValue(); |
36674 | |
36675 | |
36676 | MVT ShuffleSrcVT, ShuffleVT; |
36677 | unsigned Shuffle, PermuteImm; |
36678 | |
36679 | |
36680 | |
36681 | |
36682 | bool AllowFloatDomain = FloatDomain || (Depth >= 3); |
36683 | bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() && |
36684 | (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); |
36685 | |
36686 | |
36687 | APInt KnownUndef, KnownZero; |
36688 | resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero); |
36689 | APInt Zeroable = KnownUndef | KnownZero; |
36690 | |
36691 | if (UnaryShuffle) { |
36692 | |
36693 | |
36694 | if ((Subtarget.hasAVX2() || |
36695 | (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits)) && |
36696 | (!IsMaskedShuffle || NumRootElts == NumMaskElts)) { |
36697 | if (isUndefOrEqual(Mask, 0)) { |
36698 | if (V1.getValueType() == MaskVT && |
36699 | V1.getOpcode() == ISD::SCALAR_TO_VECTOR && |
36700 | MayFoldLoad(V1.getOperand(0))) { |
36701 | if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) |
36702 | return SDValue(); |
36703 | Res = V1.getOperand(0); |
36704 | Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); |
36705 | return DAG.getBitcast(RootVT, Res); |
36706 | } |
36707 | if (Subtarget.hasAVX2()) { |
36708 | if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) |
36709 | return SDValue(); |
36710 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
36711 | Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); |
36712 | return DAG.getBitcast(RootVT, Res); |
36713 | } |
36714 | } |
36715 | } |
36716 | |
36717 | SDValue NewV1 = V1; |
36718 | if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, |
36719 | DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, |
36720 | ShuffleVT) && |
36721 | (!IsMaskedShuffle || |
36722 | (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
36723 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
36724 | return SDValue(); |
36725 | Res = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1); |
36726 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); |
36727 | return DAG.getBitcast(RootVT, Res); |
36728 | } |
36729 | |
36730 | if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, |
36731 | AllowIntDomain, Subtarget, Shuffle, ShuffleVT, |
36732 | PermuteImm) && |
36733 | (!IsMaskedShuffle || |
36734 | (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
36735 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
36736 | return SDValue(); |
36737 | Res = CanonicalizeShuffleInput(ShuffleVT, V1); |
36738 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, |
36739 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
36740 | return DAG.getBitcast(RootVT, Res); |
36741 | } |
36742 | } |
36743 | |
36744 | |
36745 | |
36746 | |
36747 | if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 && |
36748 | Subtarget.hasSSE41() && |
36749 | !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) { |
36750 | if (MaskEltSizeInBits == 32) { |
36751 | SDValue SrcV1 = V1, SrcV2 = V2; |
36752 | if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, |
36753 | DAG) && |
36754 | SrcV2.getOpcode() == ISD::SCALAR_TO_VECTOR) { |
36755 | if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) |
36756 | return SDValue(); |
36757 | Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, |
36758 | CanonicalizeShuffleInput(MVT::v4f32, SrcV1), |
36759 | CanonicalizeShuffleInput(MVT::v4f32, SrcV2), |
36760 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
36761 | return DAG.getBitcast(RootVT, Res); |
36762 | } |
36763 | } |
36764 | if (MaskEltSizeInBits == 64 && |
36765 | isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) && |
36766 | V2.getOpcode() == ISD::SCALAR_TO_VECTOR && |
36767 | V2.getScalarValueSizeInBits() <= 32) { |
36768 | if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) |
36769 | return SDValue(); |
36770 | PermuteImm = (2 << 4) | (0 << 0); |
36771 | Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, |
36772 | CanonicalizeShuffleInput(MVT::v4f32, V1), |
36773 | CanonicalizeShuffleInput(MVT::v4f32, V2), |
36774 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
36775 | return DAG.getBitcast(RootVT, Res); |
36776 | } |
36777 | } |
36778 | |
36779 | SDValue NewV1 = V1; |
36780 | SDValue NewV2 = V2; |
36781 | if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, |
36782 | NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, |
36783 | ShuffleVT, UnaryShuffle) && |
36784 | (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
36785 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
36786 | return SDValue(); |
36787 | NewV1 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1); |
36788 | NewV2 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV2); |
36789 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2); |
36790 | return DAG.getBitcast(RootVT, Res); |
36791 | } |
36792 | |
36793 | NewV1 = V1; |
36794 | NewV2 = V2; |
36795 | if (matchBinaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, |
36796 | AllowIntDomain, NewV1, NewV2, DL, DAG, |
36797 | Subtarget, Shuffle, ShuffleVT, PermuteImm) && |
36798 | (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
36799 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
36800 | return SDValue(); |
36801 | NewV1 = CanonicalizeShuffleInput(ShuffleVT, NewV1); |
36802 | NewV2 = CanonicalizeShuffleInput(ShuffleVT, NewV2); |
36803 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, |
36804 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
36805 | return DAG.getBitcast(RootVT, Res); |
36806 | } |
36807 | |
36808 | |
36809 | MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits); |
36810 | IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts); |
36811 | |
36812 | |
36813 | if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) { |
36814 | uint64_t BitLen, BitIdx; |
36815 | if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, |
36816 | Zeroable)) { |
36817 | if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI) |
36818 | return SDValue(); |
36819 | V1 = CanonicalizeShuffleInput(IntMaskVT, V1); |
36820 | Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, |
36821 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
36822 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
36823 | return DAG.getBitcast(RootVT, Res); |
36824 | } |
36825 | |
36826 | if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { |
36827 | if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI) |
36828 | return SDValue(); |
36829 | V1 = CanonicalizeShuffleInput(IntMaskVT, V1); |
36830 | V2 = CanonicalizeShuffleInput(IntMaskVT, V2); |
36831 | Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, |
36832 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
36833 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
36834 | return DAG.getBitcast(RootVT, Res); |
36835 | } |
36836 | } |
36837 | |
36838 | |
36839 | if (AllowIntDomain && MaskEltSizeInBits < 64 && Subtarget.hasAVX512()) { |
36840 | |
36841 | if (matchShuffleAsVTRUNC(ShuffleSrcVT, ShuffleVT, IntMaskVT, Mask, Zeroable, |
36842 | Subtarget)) { |
36843 | bool IsTRUNCATE = ShuffleVT.getVectorNumElements() == |
36844 | ShuffleSrcVT.getVectorNumElements(); |
36845 | unsigned Opc = |
36846 | IsTRUNCATE ? (unsigned)ISD::TRUNCATE : (unsigned)X86ISD::VTRUNC; |
36847 | if (Depth == 0 && Root.getOpcode() == Opc) |
36848 | return SDValue(); |
36849 | V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1); |
36850 | Res = DAG.getNode(Opc, DL, ShuffleVT, V1); |
36851 | if (ShuffleVT.getSizeInBits() < RootSizeInBits) |
36852 | Res = widenSubVector(Res, true, Subtarget, DAG, DL, RootSizeInBits); |
36853 | return DAG.getBitcast(RootVT, Res); |
36854 | } |
36855 | |
36856 | |
36857 | if (RootSizeInBits < 512 && |
36858 | ((RootVT.is256BitVector() && Subtarget.useAVX512Regs()) || |
36859 | (RootVT.is128BitVector() && Subtarget.hasVLX())) && |
36860 | (MaskEltSizeInBits > 8 || Subtarget.hasBWI()) && |
36861 | isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) { |
36862 | if (Depth == 0 && Root.getOpcode() == ISD::TRUNCATE) |
36863 | return SDValue(); |
36864 | ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2); |
36865 | ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2); |
36866 | V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1); |
36867 | V2 = CanonicalizeShuffleInput(ShuffleSrcVT, V2); |
36868 | ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2); |
36869 | ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts); |
36870 | Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShuffleSrcVT, V1, V2); |
36871 | Res = DAG.getNode(ISD::TRUNCATE, DL, IntMaskVT, Res); |
36872 | return DAG.getBitcast(RootVT, Res); |
36873 | } |
36874 | } |
36875 | |
36876 | |
36877 | |
36878 | if (Depth < 1) |
36879 | return SDValue(); |
36880 | |
36881 | |
36882 | int VariableCrossLaneShuffleDepth = |
36883 | Subtarget.hasFastVariableCrossLaneShuffle() ? 1 : 2; |
36884 | int VariablePerLaneShuffleDepth = |
36885 | Subtarget.hasFastVariablePerLaneShuffle() ? 1 : 2; |
36886 | AllowVariableCrossLaneMask &= |
36887 | (Depth >= VariableCrossLaneShuffleDepth) || HasVariableMask; |
36888 | AllowVariablePerLaneMask &= |
36889 | (Depth >= VariablePerLaneShuffleDepth) || HasVariableMask; |
36890 | |
36891 | |
36892 | bool AllowBWIVPERMV3 = |
36893 | (Depth >= (VariableCrossLaneShuffleDepth + 2) || HasVariableMask); |
36894 | |
36895 | bool MaskContainsZeros = isAnyZero(Mask); |
36896 | |
36897 | if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) { |
36898 | |
36899 | if (UnaryShuffle && AllowVariableCrossLaneMask && !MaskContainsZeros) { |
36900 | if (Subtarget.hasAVX2() && |
36901 | (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) { |
36902 | SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); |
36903 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
36904 | Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); |
36905 | return DAG.getBitcast(RootVT, Res); |
36906 | } |
36907 | |
36908 | if ((Subtarget.hasAVX512() && |
36909 | (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || |
36910 | MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || |
36911 | (Subtarget.hasBWI() && |
36912 | (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || |
36913 | (Subtarget.hasVBMI() && |
36914 | (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8))) { |
36915 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
36916 | V2 = DAG.getUNDEF(MaskVT); |
36917 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
36918 | return DAG.getBitcast(RootVT, Res); |
36919 | } |
36920 | } |
36921 | |
36922 | |
36923 | |
36924 | if (UnaryShuffle && AllowVariableCrossLaneMask && |
36925 | ((Subtarget.hasAVX512() && |
36926 | (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || |
36927 | MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || |
36928 | MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 || |
36929 | MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || |
36930 | (Subtarget.hasBWI() && AllowBWIVPERMV3 && |
36931 | (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || |
36932 | (Subtarget.hasVBMI() && AllowBWIVPERMV3 && |
36933 | (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { |
36934 | |
36935 | for (unsigned i = 0; i != NumMaskElts; ++i) |
36936 | if (Mask[i] == SM_SentinelZero) |
36937 | Mask[i] = NumMaskElts + i; |
36938 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
36939 | V2 = getZeroVector(MaskVT, Subtarget, DAG, DL); |
36940 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
36941 | return DAG.getBitcast(RootVT, Res); |
36942 | } |
36943 | |
36944 | |
36945 | |
36946 | if (SDValue WideShuffle = combineX86ShuffleChainWithExtract( |
36947 | Inputs, Root, BaseMask, Depth, HasVariableMask, |
36948 | AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, |
36949 | Subtarget)) |
36950 | return WideShuffle; |
36951 | |
36952 | |
36953 | |
36954 | if (AllowVariableCrossLaneMask && !MaskContainsZeros && |
36955 | ((Subtarget.hasAVX512() && |
36956 | (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || |
36957 | MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || |
36958 | MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 || |
36959 | MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || |
36960 | (Subtarget.hasBWI() && AllowBWIVPERMV3 && |
36961 | (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || |
36962 | (Subtarget.hasVBMI() && AllowBWIVPERMV3 && |
36963 | (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { |
36964 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
36965 | V2 = CanonicalizeShuffleInput(MaskVT, V2); |
36966 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
36967 | return DAG.getBitcast(RootVT, Res); |
36968 | } |
36969 | return SDValue(); |
36970 | } |
36971 | |
36972 | |
36973 | |
36974 | if (UnaryShuffle && MaskContainsZeros && AllowVariablePerLaneMask && |
36975 | isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) && |
36976 | DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) { |
36977 | APInt Zero = APInt::getNullValue(MaskEltSizeInBits); |
36978 | APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits); |
36979 | APInt UndefElts(NumMaskElts, 0); |
36980 | SmallVector<APInt, 64> EltBits(NumMaskElts, Zero); |
36981 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
36982 | int M = Mask[i]; |
36983 | if (M == SM_SentinelUndef) { |
36984 | UndefElts.setBit(i); |
36985 | continue; |
36986 | } |
36987 | if (M == SM_SentinelZero) |
36988 | continue; |
36989 | EltBits[i] = AllOnes; |
36990 | } |
36991 | SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL); |
36992 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
36993 | unsigned AndOpcode = |
36994 | MaskVT.isFloatingPoint() ? unsigned(X86ISD::FAND) : unsigned(ISD::AND); |
36995 | Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask); |
36996 | return DAG.getBitcast(RootVT, Res); |
36997 | } |
36998 | |
36999 | |
37000 | |
37001 | |
37002 | if (UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros && |
37003 | ((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) || |
37004 | (MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) { |
37005 | SmallVector<SDValue, 16> VPermIdx; |
37006 | for (int M : Mask) { |
37007 | SDValue Idx = |
37008 | M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32); |
37009 | VPermIdx.push_back(Idx); |
37010 | } |
37011 | SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx); |
37012 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
37013 | Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask); |
37014 | return DAG.getBitcast(RootVT, Res); |
37015 | } |
37016 | |
37017 | |
37018 | |
37019 | if (AllowVariablePerLaneMask && Subtarget.hasXOP() && |
37020 | (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 || |
37021 | MaskVT == MVT::v8f32)) { |
37022 | |
37023 | |
37024 | |
37025 | |
37026 | unsigned NumLanes = MaskVT.getSizeInBits() / 128; |
37027 | unsigned NumEltsPerLane = NumMaskElts / NumLanes; |
37028 | SmallVector<int, 8> VPerm2Idx; |
37029 | unsigned M2ZImm = 0; |
37030 | for (int M : Mask) { |
37031 | if (M == SM_SentinelUndef) { |
37032 | VPerm2Idx.push_back(-1); |
37033 | continue; |
37034 | } |
37035 | if (M == SM_SentinelZero) { |
37036 | M2ZImm = 2; |
37037 | VPerm2Idx.push_back(8); |
37038 | continue; |
37039 | } |
37040 | int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane); |
37041 | Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index); |
37042 | VPerm2Idx.push_back(Index); |
37043 | } |
37044 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
37045 | V2 = CanonicalizeShuffleInput(MaskVT, V2); |
37046 | SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); |
37047 | Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, |
37048 | DAG.getTargetConstant(M2ZImm, DL, MVT::i8)); |
37049 | return DAG.getBitcast(RootVT, Res); |
37050 | } |
37051 | |
37052 | |
37053 | |
37054 | |
37055 | |
37056 | |
37057 | if (UnaryShuffle && AllowVariablePerLaneMask && |
37058 | ((RootVT.is128BitVector() && Subtarget.hasSSSE3()) || |
37059 | (RootVT.is256BitVector() && Subtarget.hasAVX2()) || |
37060 | (RootVT.is512BitVector() && Subtarget.hasBWI()))) { |
37061 | SmallVector<SDValue, 16> PSHUFBMask; |
37062 | int NumBytes = RootVT.getSizeInBits() / 8; |
37063 | int Ratio = NumBytes / NumMaskElts; |
37064 | for (int i = 0; i < NumBytes; ++i) { |
37065 | int M = Mask[i / Ratio]; |
37066 | if (M == SM_SentinelUndef) { |
37067 | PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8)); |
37068 | continue; |
37069 | } |
37070 | if (M == SM_SentinelZero) { |
37071 | PSHUFBMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); |
37072 | continue; |
37073 | } |
37074 | M = Ratio * M + i % Ratio; |
37075 | assert((M / 16) == (i / 16) && "Lane crossing detected"); |
37076 | PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8)); |
37077 | } |
37078 | MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); |
37079 | Res = CanonicalizeShuffleInput(ByteVT, V1); |
37080 | SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask); |
37081 | Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp); |
37082 | return DAG.getBitcast(RootVT, Res); |
37083 | } |
37084 | |
37085 | |
37086 | |
37087 | |
37088 | if (AllowVariablePerLaneMask && RootVT.is128BitVector() && |
37089 | Subtarget.hasXOP()) { |
37090 | |
37091 | |
37092 | |
37093 | SmallVector<SDValue, 16> VPPERMMask; |
37094 | int NumBytes = 16; |
37095 | int Ratio = NumBytes / NumMaskElts; |
37096 | for (int i = 0; i < NumBytes; ++i) { |
37097 | int M = Mask[i / Ratio]; |
37098 | if (M == SM_SentinelUndef) { |
37099 | VPPERMMask.push_back(DAG.getUNDEF(MVT::i8)); |
37100 | continue; |
37101 | } |
37102 | if (M == SM_SentinelZero) { |
37103 | VPPERMMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); |
37104 | continue; |
37105 | } |
37106 | M = Ratio * M + i % Ratio; |
37107 | VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8)); |
37108 | } |
37109 | MVT ByteVT = MVT::v16i8; |
37110 | V1 = CanonicalizeShuffleInput(ByteVT, V1); |
37111 | V2 = CanonicalizeShuffleInput(ByteVT, V2); |
37112 | SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask); |
37113 | Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp); |
37114 | return DAG.getBitcast(RootVT, Res); |
37115 | } |
37116 | |
37117 | |
37118 | |
37119 | if (SDValue WideShuffle = combineX86ShuffleChainWithExtract( |
37120 | Inputs, Root, BaseMask, Depth, HasVariableMask, |
37121 | AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, Subtarget)) |
37122 | return WideShuffle; |
37123 | |
37124 | |
37125 | |
37126 | if (!UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros && |
37127 | ((Subtarget.hasAVX512() && |
37128 | (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v8f64 || |
37129 | MaskVT == MVT::v2i64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8i64 || |
37130 | MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 || |
37131 | MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || |
37132 | MaskVT == MVT::v16i32)) || |
37133 | (Subtarget.hasBWI() && AllowBWIVPERMV3 && |
37134 | (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || |
37135 | MaskVT == MVT::v32i16)) || |
37136 | (Subtarget.hasVBMI() && AllowBWIVPERMV3 && |
37137 | (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || |
37138 | MaskVT == MVT::v64i8)))) { |
37139 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
37140 | V2 = CanonicalizeShuffleInput(MaskVT, V2); |
37141 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
37142 | return DAG.getBitcast(RootVT, Res); |
37143 | } |
37144 | |
37145 | |
37146 | return SDValue(); |
37147 | } |
37148 | |
37149 | |
37150 | |
37151 | |
37152 | |
37153 | |
37154 | |
37155 | |
37156 | |
37157 | static SDValue combineX86ShuffleChainWithExtract( |
37158 | ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth, |
37159 | bool HasVariableMask, bool AllowVariableCrossLaneMask, |
37160 | bool AllowVariablePerLaneMask, SelectionDAG &DAG, |
37161 | const X86Subtarget &Subtarget) { |
37162 | unsigned NumMaskElts = BaseMask.size(); |
37163 | unsigned NumInputs = Inputs.size(); |
37164 | if (NumInputs == 0) |
37165 | return SDValue(); |
37166 | |
37167 | EVT RootVT = Root.getValueType(); |
37168 | unsigned RootSizeInBits = RootVT.getSizeInBits(); |
37169 | assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle mask"); |
37170 | |
37171 | SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end()); |
37172 | SmallVector<unsigned, 4> Offsets(NumInputs, 0); |
37173 | |
37174 | |
37175 | |
37176 | unsigned WideSizeInBits = RootSizeInBits; |
37177 | for (unsigned i = 0; i != NumInputs; ++i) { |
37178 | SDValue &Src = WideInputs[i]; |
37179 | unsigned &Offset = Offsets[i]; |
37180 | Src = peekThroughBitcasts(Src); |
37181 | EVT BaseVT = Src.getValueType(); |
37182 | while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
37183 | Offset += Src.getConstantOperandVal(1); |
37184 | Src = Src.getOperand(0); |
37185 | } |
37186 | WideSizeInBits = std::max(WideSizeInBits, |
37187 | (unsigned)Src.getValueSizeInBits()); |
37188 | assert((Offset % BaseVT.getVectorNumElements()) == 0 && |
37189 | "Unexpected subvector extraction"); |
37190 | Offset /= BaseVT.getVectorNumElements(); |
37191 | Offset *= NumMaskElts; |
37192 | } |
37193 | |
37194 | |
37195 | |
37196 | if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; })) |
37197 | return SDValue(); |
37198 | |
37199 | unsigned Scale = WideSizeInBits / RootSizeInBits; |
37200 | assert((WideSizeInBits % RootSizeInBits) == 0 && |
37201 | "Unexpected subvector extraction"); |
37202 | |
37203 | |
37204 | |
37205 | |
37206 | EVT WideSVT = WideInputs[0].getValueType().getScalarType(); |
37207 | if (llvm::any_of(WideInputs, [&WideSVT, &DAG](SDValue Op) { |
37208 | return !DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()) || |
37209 | Op.getValueType().getScalarType() != WideSVT; |
37210 | })) |
37211 | return SDValue(); |
37212 | |
37213 | for (SDValue &NewInput : WideInputs) { |
37214 | assert((WideSizeInBits % NewInput.getValueSizeInBits()) == 0 && |
37215 | "Shuffle vector size mismatch"); |
37216 | if (WideSizeInBits > NewInput.getValueSizeInBits()) |
37217 | NewInput = widenSubVector(NewInput, false, Subtarget, DAG, |
37218 | SDLoc(NewInput), WideSizeInBits); |
37219 | assert(WideSizeInBits == NewInput.getValueSizeInBits() && |
37220 | "Unexpected subvector extraction"); |
37221 | } |
37222 | |
37223 | |
37224 | for (unsigned i = 1; i != NumInputs; ++i) |
37225 | Offsets[i] += i * Scale * NumMaskElts; |
37226 | |
37227 | SmallVector<int, 64> WideMask(BaseMask.begin(), BaseMask.end()); |
37228 | for (int &M : WideMask) { |
37229 | if (M < 0) |
37230 | continue; |
37231 | M = (M % NumMaskElts) + Offsets[M / NumMaskElts]; |
37232 | } |
37233 | WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef); |
37234 | |
37235 | |
37236 | resolveTargetShuffleInputsAndMask(WideInputs, WideMask); |
37237 | assert(!WideInputs.empty() && "Shuffle with no inputs detected"); |
37238 | |
37239 | if (WideInputs.size() > 2) |
37240 | return SDValue(); |
37241 | |
37242 | |
37243 | Depth += count_if(Offsets, [](unsigned Offset) { return Offset > 0; }); |
37244 | |
37245 | |
37246 | |
37247 | SDValue WideRoot = WideInputs[0]; |
37248 | if (SDValue WideShuffle = |
37249 | combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth, |
37250 | HasVariableMask, AllowVariableCrossLaneMask, |
37251 | AllowVariablePerLaneMask, DAG, Subtarget)) { |
37252 | WideShuffle = |
37253 | extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits); |
37254 | return DAG.getBitcast(RootVT, WideShuffle); |
37255 | } |
37256 | return SDValue(); |
37257 | } |
37258 | |
37259 | |
37260 | |
37261 | static SDValue canonicalizeShuffleMaskWithHorizOp( |
37262 | MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask, |
37263 | unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG, |
37264 | const X86Subtarget &Subtarget) { |
37265 | if (Mask.empty() || Ops.empty()) |
37266 | return SDValue(); |
37267 | |
37268 | SmallVector<SDValue> BC; |
37269 | for (SDValue Op : Ops) |
37270 | BC.push_back(peekThroughBitcasts(Op)); |
37271 | |
37272 | |
37273 | SDValue BC0 = BC[0]; |
37274 | EVT VT0 = BC0.getValueType(); |
37275 | unsigned Opcode0 = BC0.getOpcode(); |
37276 | if (VT0.getSizeInBits() != RootSizeInBits || llvm::any_of(BC, [&](SDValue V) { |
37277 | return V.getOpcode() != Opcode0 || V.getValueType() != VT0; |
37278 | })) |
37279 | return SDValue(); |
37280 | |
37281 | bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD || |
37282 | Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB); |
37283 | bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS); |
37284 | if (!isHoriz && !isPack) |
37285 | return SDValue(); |
37286 | |
37287 | |
37288 | bool OneUseOps = llvm::all_of(Ops, [](SDValue Op) { |
37289 | return Op.hasOneUse() && |
37290 | peekThroughBitcasts(Op) == peekThroughOneUseBitcasts(Op); |
37291 | }); |
37292 | |
37293 | int NumElts = VT0.getVectorNumElements(); |
37294 | int NumLanes = VT0.getSizeInBits() / 128; |
37295 | int NumEltsPerLane = NumElts / NumLanes; |
37296 | int NumHalfEltsPerLane = NumEltsPerLane / 2; |
37297 | MVT SrcVT = BC0.getOperand(0).getSimpleValueType(); |
37298 | unsigned EltSizeInBits = RootSizeInBits / Mask.size(); |
37299 | |
37300 | if (NumEltsPerLane >= 4 && |
37301 | (isPack || shouldUseHorizontalOp(Ops.size() == 1, DAG, Subtarget))) { |
37302 | SmallVector<int> LaneMask, ScaledMask; |
37303 | if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, LaneMask) && |
37304 | scaleShuffleElements(LaneMask, 4, ScaledMask)) { |
37305 | |
37306 | |
37307 | |
37308 | |
37309 | if (isHoriz) { |
37310 | |
37311 | auto GetHOpSrc = [&](int M) { |
37312 | if (M == SM_SentinelUndef) |
37313 | return DAG.getUNDEF(VT0); |
37314 | if (M == SM_SentinelZero) |
37315 | return getZeroVector(VT0.getSimpleVT(), Subtarget, DAG, DL); |
37316 | SDValue Src0 = BC[M / 4]; |
37317 | SDValue Src1 = Src0.getOperand((M % 4) >= 2); |
37318 | if (Src1.getOpcode() == Opcode0 && Src0->isOnlyUserOf(Src1.getNode())) |
37319 | return Src1.getOperand(M % 2); |
37320 | return SDValue(); |
37321 | }; |
37322 | SDValue M0 = GetHOpSrc(ScaledMask[0]); |
37323 | SDValue M1 = GetHOpSrc(ScaledMask[1]); |
37324 | SDValue M2 = GetHOpSrc(ScaledMask[2]); |
37325 | SDValue M3 = GetHOpSrc(ScaledMask[3]); |
37326 | if (M0 && M1 && M2 && M3) { |
37327 | SDValue LHS = DAG.getNode(Opcode0, DL, SrcVT, M0, M1); |
37328 | SDValue RHS = DAG.getNode(Opcode0, DL, SrcVT, M2, M3); |
37329 | return DAG.getNode(Opcode0, DL, VT0, LHS, RHS); |
37330 | } |
37331 | } |
37332 | |
37333 | if (Ops.size() >= 2) { |
37334 | SDValue LHS, RHS; |
37335 | auto GetHOpSrc = [&](int M, int &OutM) { |
37336 | |
37337 | if (M < 0) |
37338 | return M == SM_SentinelUndef; |
37339 | SDValue Src = BC[M / 4].getOperand((M % 4) >= 2); |
37340 | if (!LHS || LHS == Src) { |
37341 | LHS = Src; |
37342 | OutM = (M % 2); |
37343 | return true; |
37344 | } |
37345 | if (!RHS || RHS == Src) { |
37346 | RHS = Src; |
37347 | OutM = (M % 2) + 2; |
37348 | return true; |
37349 | } |
37350 | return false; |
37351 | }; |
37352 | int PostMask[4] = {-1, -1, -1, -1}; |
37353 | if (GetHOpSrc(ScaledMask[0], PostMask[0]) && |
37354 | GetHOpSrc(ScaledMask[1], PostMask[1]) && |
37355 | GetHOpSrc(ScaledMask[2], PostMask[2]) && |
37356 | GetHOpSrc(ScaledMask[3], PostMask[3])) { |
37357 | LHS = DAG.getBitcast(SrcVT, LHS); |
37358 | RHS = DAG.getBitcast(SrcVT, RHS ? RHS : LHS); |
37359 | SDValue Res = DAG.getNode(Opcode0, DL, VT0, LHS, RHS); |
37360 | |
37361 | |
37362 | MVT ShuffleVT = MVT::getVectorVT(MVT::f32, RootSizeInBits / 32); |
37363 | Res = DAG.getBitcast(ShuffleVT, Res); |
37364 | return DAG.getNode(X86ISD::SHUFP, DL, ShuffleVT, Res, Res, |
37365 | getV4X86ShuffleImm8ForMask(PostMask, DL, DAG)); |
37366 | } |
37367 | } |
37368 | } |
37369 | } |
37370 | |
37371 | if (2 < Ops.size()) |
37372 | return SDValue(); |
37373 | |
37374 | SDValue BC1 = BC[BC.size() - 1]; |
37375 | if (Mask.size() == VT0.getVectorNumElements()) { |
37376 | |
37377 | |
37378 | |
37379 | if (Ops.size() == 2) { |
37380 | auto ContainsOps = [](SDValue HOp, SDValue Op) { |
37381 | return Op == HOp.getOperand(0) || Op == HOp.getOperand(1); |
37382 | }; |
37383 | |
37384 | if (ContainsOps(BC1, BC0.getOperand(0)) && |
37385 | ContainsOps(BC1, BC0.getOperand(1))) { |
37386 | ShuffleVectorSDNode::commuteMask(Mask); |
37387 | std::swap(Ops[0], Ops[1]); |
37388 | std::swap(BC0, BC1); |
37389 | } |
37390 | |
37391 | |
37392 | if (ContainsOps(BC0, BC1.getOperand(0)) && |
37393 | ContainsOps(BC0, BC1.getOperand(1))) { |
37394 | for (int &M : Mask) { |
37395 | if (M < NumElts) |
37396 | continue; |
37397 | int SubLane = ((M % NumEltsPerLane) >= NumHalfEltsPerLane) ? 1 : 0; |
37398 | M -= NumElts + (SubLane * NumHalfEltsPerLane); |
37399 | if (BC1.getOperand(SubLane) != BC0.getOperand(0)) |
37400 | M += NumHalfEltsPerLane; |
37401 | } |
37402 | } |
37403 | } |
37404 | |
37405 | |
37406 | for (int i = 0; i != NumElts; ++i) { |
37407 | int &M = Mask[i]; |
37408 | if (isUndefOrZero(M)) |
37409 | continue; |
37410 | if (M < NumElts && BC0.getOperand(0) == BC0.getOperand(1) && |
37411 | (M % NumEltsPerLane) >= NumHalfEltsPerLane) |
37412 | M -= NumHalfEltsPerLane; |
37413 | if (NumElts <= M && BC1.getOperand(0) == BC1.getOperand(1) && |
37414 | (M % NumEltsPerLane) >= NumHalfEltsPerLane) |
37415 | M -= NumHalfEltsPerLane; |
37416 | } |
37417 | } |
37418 | |
37419 | |
37420 | |
37421 | |
37422 | SmallVector<int, 16> TargetMask128, WideMask128; |
37423 | if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, TargetMask128) && |
37424 | scaleShuffleElements(TargetMask128, 2, WideMask128)) { |
37425 | assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle"); |
37426 | bool SingleOp = (Ops.size() == 1); |
37427 | if (isPack || OneUseOps || |
37428 | shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) { |
37429 | SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1; |
37430 | SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1; |
37431 | Lo = Lo.getOperand(WideMask128[0] & 1); |
37432 | Hi = Hi.getOperand(WideMask128[1] & 1); |
37433 | if (SingleOp) { |
37434 | SDValue Undef = DAG.getUNDEF(SrcVT); |
37435 | SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL); |
37436 | Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo); |
37437 | Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi); |
37438 | Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo); |
37439 | Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi); |
37440 | } |
37441 | return DAG.getNode(Opcode0, DL, VT0, Lo, Hi); |
37442 | } |
37443 | } |
37444 | |
37445 | return SDValue(); |
37446 | } |
37447 | |
37448 | |
37449 | |
37450 | |
37451 | static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops, |
37452 | ArrayRef<int> Mask, SDValue Root, |
37453 | bool HasVariableMask, |
37454 | SelectionDAG &DAG, |
37455 | const X86Subtarget &Subtarget) { |
37456 | MVT VT = Root.getSimpleValueType(); |
37457 | |
37458 | unsigned SizeInBits = VT.getSizeInBits(); |
37459 | unsigned NumMaskElts = Mask.size(); |
37460 | unsigned MaskSizeInBits = SizeInBits / NumMaskElts; |
37461 | unsigned NumOps = Ops.size(); |
37462 | |
37463 | |
37464 | bool OneUseConstantOp = false; |
37465 | SmallVector<APInt, 16> UndefEltsOps(NumOps); |
37466 | SmallVector<SmallVector<APInt, 16>, 16> RawBitsOps(NumOps); |
37467 | for (unsigned i = 0; i != NumOps; ++i) { |
37468 | SDValue SrcOp = Ops[i]; |
37469 | OneUseConstantOp |= SrcOp.hasOneUse(); |
37470 | if (!getTargetConstantBitsFromNode(SrcOp, MaskSizeInBits, UndefEltsOps[i], |
37471 | RawBitsOps[i])) |
37472 | return SDValue(); |
37473 | } |
37474 | |
37475 | |
37476 | |
37477 | |
37478 | if (!OneUseConstantOp && !HasVariableMask) |
37479 | return SDValue(); |
37480 | |
37481 | |
37482 | SDLoc DL(Root); |
37483 | APInt UndefElts(NumMaskElts, 0); |
37484 | APInt ZeroElts(NumMaskElts, 0); |
37485 | APInt ConstantElts(NumMaskElts, 0); |
37486 | SmallVector<APInt, 8> ConstantBitData(NumMaskElts, |
37487 | APInt::getNullValue(MaskSizeInBits)); |
37488 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
37489 | int M = Mask[i]; |
37490 | if (M == SM_SentinelUndef) { |
37491 | UndefElts.setBit(i); |
37492 | continue; |
37493 | } else if (M == SM_SentinelZero) { |
37494 | ZeroElts.setBit(i); |
37495 | continue; |
37496 | } |
37497 | assert(0 <= M && M < (int)(NumMaskElts * NumOps)); |
37498 | |
37499 | unsigned SrcOpIdx = (unsigned)M / NumMaskElts; |
37500 | unsigned SrcMaskIdx = (unsigned)M % NumMaskElts; |
37501 | |
37502 | auto &SrcUndefElts = UndefEltsOps[SrcOpIdx]; |
37503 | if (SrcUndefElts[SrcMaskIdx]) { |
37504 | UndefElts.setBit(i); |
37505 | continue; |
37506 | } |
37507 | |
37508 | auto &SrcEltBits = RawBitsOps[SrcOpIdx]; |
37509 | APInt &Bits = SrcEltBits[SrcMaskIdx]; |
37510 | if (!Bits) { |
37511 | ZeroElts.setBit(i); |
37512 | continue; |
37513 | } |
37514 | |
37515 | ConstantElts.setBit(i); |
37516 | ConstantBitData[i] = Bits; |
37517 | } |
37518 | assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue()); |
37519 | |
37520 | |
37521 | if ((UndefElts | ZeroElts).isAllOnesValue()) |
37522 | return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL); |
37523 | |
37524 | |
37525 | MVT MaskSVT; |
37526 | if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64)) |
37527 | MaskSVT = MVT::getFloatingPointVT(MaskSizeInBits); |
37528 | else |
37529 | MaskSVT = MVT::getIntegerVT(MaskSizeInBits); |
37530 | |
37531 | MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts); |
37532 | if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) |
37533 | return SDValue(); |
37534 | |
37535 | SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL); |
37536 | return DAG.getBitcast(VT, CstOp); |
37537 | } |
37538 | |
37539 | namespace llvm { |
37540 | namespace X86 { |
37541 | enum { |
37542 | MaxShuffleCombineDepth = 8 |
37543 | }; |
37544 | } |
37545 | } |
37546 | |
37547 | |
37548 | |
37549 | |
37550 | |
37551 | |
37552 | |
37553 | |
37554 | |
37555 | |
37556 | |
37557 | |
37558 | |
37559 | |
37560 | |
37561 | |
37562 | |
37563 | |
37564 | |
37565 | |
37566 | |
37567 | |
37568 | |
37569 | |
37570 | |
37571 | |
37572 | |
37573 | |
37574 | |
37575 | |
37576 | static SDValue combineX86ShufflesRecursively( |
37577 | ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root, |
37578 | ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth, |
37579 | unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask, |
37580 | bool AllowVariablePerLaneMask, SelectionDAG &DAG, |
37581 | const X86Subtarget &Subtarget) { |
37582 | assert(RootMask.size() > 0 && |
37583 | (RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) && |
37584 | "Illegal shuffle root mask"); |
37585 | assert(Root.getSimpleValueType().isVector() && |
37586 | "Shuffles operate on vector types!"); |
37587 | unsigned RootSizeInBits = Root.getSimpleValueType().getSizeInBits(); |
37588 | |
37589 | |
37590 | |
37591 | if (Depth >= MaxDepth) |
37592 | return SDValue(); |
37593 | |
37594 | |
37595 | SDValue Op = SrcOps[SrcOpIndex]; |
37596 | Op = peekThroughOneUseBitcasts(Op); |
37597 | |
37598 | EVT VT = Op.getValueType(); |
37599 | if (!VT.isVector() || !VT.isSimple()) |
37600 | return SDValue(); |
37601 | |
37602 | |
37603 | if (VT.getVectorElementType() == MVT::f16) |
37604 | return SDValue(); |
37605 | |
37606 | assert((RootSizeInBits % VT.getSizeInBits()) == 0 && |
37607 | "Can only combine shuffles upto size of the root op."); |
37608 | |
37609 | |
37610 | |
37611 | SmallVector<int, 64> OpMask; |
37612 | SmallVector<SDValue, 2> OpInputs; |
37613 | APInt OpUndef, OpZero; |
37614 | APInt OpDemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
37615 | bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode()); |
37616 | if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef, |
37617 | OpZero, DAG, Depth, false)) |
37618 | return SDValue(); |
37619 | |
37620 | |
37621 | |
37622 | if (llvm::any_of(OpInputs, [VT](SDValue OpInput) { |
37623 | return OpInput.getValueSizeInBits() > VT.getSizeInBits(); |
37624 | })) |
37625 | return SDValue(); |
37626 | |
37627 | |
37628 | |
37629 | if (RootSizeInBits > VT.getSizeInBits()) { |
37630 | unsigned NumSubVecs = RootSizeInBits / VT.getSizeInBits(); |
37631 | unsigned OpMaskSize = OpMask.size(); |
37632 | if (OpInputs.size() > 1) { |
37633 | unsigned PaddedMaskSize = NumSubVecs * OpMaskSize; |
37634 | for (int &M : OpMask) { |
37635 | if (M < 0) |
37636 | continue; |
37637 | int EltIdx = M % OpMaskSize; |
37638 | int OpIdx = M / OpMaskSize; |
37639 | M = (PaddedMaskSize * OpIdx) + EltIdx; |
37640 | } |
37641 | } |
37642 | OpZero = OpZero.zext(NumSubVecs * OpMaskSize); |
37643 | OpUndef = OpUndef.zext(NumSubVecs * OpMaskSize); |
37644 | OpMask.append((NumSubVecs - 1) * OpMaskSize, SM_SentinelUndef); |
37645 | } |
37646 | |
37647 | SmallVector<int, 64> Mask; |
37648 | SmallVector<SDValue, 16> Ops; |
37649 | |
37650 | |
37651 | bool EmptyRoot = (Depth == 0) && (RootMask.size() == 1); |
37652 | if (EmptyRoot) { |
37653 | |
37654 | |
37655 | bool ResolveKnownZeros = true; |
37656 | if (!OpZero.isNullValue()) { |
37657 | APInt UsedInputs = APInt::getNullValue(OpInputs.size()); |
37658 | for (int i = 0, e = OpMask.size(); i != e; ++i) { |
37659 | int M = OpMask[i]; |
37660 | if (OpUndef[i] || OpZero[i] || isUndefOrZero(M)) |
37661 | continue; |
37662 | UsedInputs.setBit(M / OpMask.size()); |
37663 | if (UsedInputs.isAllOnesValue()) { |
37664 | ResolveKnownZeros = false; |
37665 | break; |
37666 | } |
37667 | } |
37668 | } |
37669 | resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero, |
37670 | ResolveKnownZeros); |
37671 | |
37672 | Mask = OpMask; |
37673 | Ops.append(OpInputs.begin(), OpInputs.end()); |
37674 | } else { |
37675 | resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero); |
37676 | |
37677 | |
37678 | Ops.append(SrcOps.begin(), SrcOps.end()); |
37679 | |
37680 | auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int { |
37681 | |
37682 | SDValue InputBC = peekThroughBitcasts(Input); |
37683 | for (int i = 0, e = Ops.size(); i < e; ++i) |
37684 | if (InputBC == peekThroughBitcasts(Ops[i])) |
37685 | return i; |
37686 | |
37687 | if (InsertionPoint >= 0) { |
37688 | Ops[InsertionPoint] = Input; |
37689 | return InsertionPoint; |
37690 | } |
37691 | |
37692 | Ops.push_back(Input); |
37693 | return Ops.size() - 1; |
37694 | }; |
37695 | |
37696 | SmallVector<int, 2> OpInputIdx; |
37697 | for (SDValue OpInput : OpInputs) |
37698 | OpInputIdx.push_back( |
37699 | AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1)); |
37700 | |
37701 | assert(((RootMask.size() > OpMask.size() && |
37702 | RootMask.size() % OpMask.size() == 0) || |
37703 | (OpMask.size() > RootMask.size() && |
37704 | OpMask.size() % RootMask.size() == 0) || |
37705 | OpMask.size() == RootMask.size()) && |
37706 | "The smaller number of elements must divide the larger."); |
37707 | |
37708 | |
37709 | |
37710 | |
37711 | assert(isPowerOf2_32(RootMask.size()) && |
37712 | "Non-power-of-2 shuffle mask sizes"); |
37713 | assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes"); |
37714 | unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size()); |
37715 | unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size()); |
37716 | |
37717 | unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size()); |
37718 | unsigned RootRatio = |
37719 | std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2); |
37720 | unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2); |
37721 | assert((RootRatio == 1 || OpRatio == 1) && |
37722 | "Must not have a ratio for both incoming and op masks!"); |
37723 | |
37724 | assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes"); |
37725 | assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes"); |
37726 | assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes"); |
37727 | unsigned RootRatioLog2 = countTrailingZeros(RootRatio); |
37728 | unsigned OpRatioLog2 = countTrailingZeros(OpRatio); |
37729 | |
37730 | Mask.resize(MaskWidth, SM_SentinelUndef); |
37731 | |
37732 | |
37733 | |
37734 | |
37735 | |
37736 | for (unsigned i = 0; i < MaskWidth; ++i) { |
37737 | unsigned RootIdx = i >> RootRatioLog2; |
37738 | if (RootMask[RootIdx] < 0) { |
37739 | |
37740 | Mask[i] = RootMask[RootIdx]; |
37741 | continue; |
37742 | } |
37743 | |
37744 | unsigned RootMaskedIdx = |
37745 | RootRatio == 1 |
37746 | ? RootMask[RootIdx] |
37747 | : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1)); |
37748 | |
37749 | |
37750 | |
37751 | if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) || |
37752 | (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) { |
37753 | Mask[i] = RootMaskedIdx; |
37754 | continue; |
37755 | } |
37756 | |
37757 | RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1); |
37758 | unsigned OpIdx = RootMaskedIdx >> OpRatioLog2; |
37759 | if (OpMask[OpIdx] < 0) { |
37760 | |
37761 | |
37762 | Mask[i] = OpMask[OpIdx]; |
37763 | continue; |
37764 | } |
37765 | |
37766 | |
37767 | unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx] |
37768 | : (OpMask[OpIdx] << OpRatioLog2) + |
37769 | (RootMaskedIdx & (OpRatio - 1)); |
37770 | |
37771 | OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); |
37772 | int InputIdx = OpMask[OpIdx] / (int)OpMask.size(); |
37773 | assert(0 <= OpInputIdx[InputIdx] && "Unknown target shuffle input"); |
37774 | OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth; |
37775 | |
37776 | Mask[i] = OpMaskedIdx; |
37777 | } |
37778 | } |
37779 | |
37780 | |
37781 | resolveTargetShuffleInputsAndMask(Ops, Mask); |
37782 | |
37783 | |
37784 | if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) |
37785 | return DAG.getUNDEF(Root.getValueType()); |
37786 | if (all_of(Mask, [](int Idx) { return Idx < 0; })) |
37787 | return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, |
37788 | SDLoc(Root)); |
37789 | if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) && |
37790 | none_of(Mask, [](int M) { return M == SM_SentinelZero; })) |
37791 | return getOnesVector(Root.getValueType(), DAG, SDLoc(Root)); |
37792 | |
37793 | assert(!Ops.empty() && "Shuffle with no inputs detected"); |
37794 | HasVariableMask |= IsOpVariableMask; |
37795 | |
37796 | |
37797 | SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes.begin(), |
37798 | SrcNodes.end()); |
37799 | CombinedNodes.push_back(Op.getNode()); |
37800 | |
37801 | |
37802 | |
37803 | |
37804 | |
37805 | |
37806 | |
37807 | |
37808 | if (Ops.size() < (MaxDepth - Depth)) { |
37809 | for (int i = 0, e = Ops.size(); i < e; ++i) { |
37810 | |
37811 | |
37812 | SmallVector<int, 64> ResolvedMask = Mask; |
37813 | if (EmptyRoot) |
37814 | resolveTargetShuffleFromZeroables(ResolvedMask, OpUndef, OpZero); |
37815 | bool AllowCrossLaneVar = false; |
37816 | bool AllowPerLaneVar = false; |
37817 | if (Ops[i].getNode()->hasOneUse() || |
37818 | SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) { |
37819 | AllowCrossLaneVar = AllowVariableCrossLaneMask; |
37820 | AllowPerLaneVar = AllowVariablePerLaneMask; |
37821 | } |
37822 | if (SDValue Res = combineX86ShufflesRecursively( |
37823 | Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth, |
37824 | HasVariableMask, AllowCrossLaneVar, AllowPerLaneVar, DAG, |
37825 | Subtarget)) |
37826 | return Res; |
37827 | } |
37828 | } |
37829 | |
37830 | |
37831 | if (SDValue Cst = combineX86ShufflesConstants( |
37832 | Ops, Mask, Root, HasVariableMask, DAG, Subtarget)) |
37833 | return Cst; |
37834 | |
37835 | |
37836 | |
37837 | if (Depth == 0 && llvm::all_of(Ops, [&](SDValue Op) { |
37838 | APInt UndefElts; |
37839 | SmallVector<APInt> RawBits; |
37840 | unsigned EltSizeInBits = RootSizeInBits / Mask.size(); |
37841 | return getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, |
37842 | RawBits); |
37843 | })) { |
37844 | return SDValue(); |
37845 | } |
37846 | |
37847 | |
37848 | |
37849 | if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp( |
37850 | Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget)) |
37851 | return DAG.getBitcast(Root.getValueType(), HOp); |
37852 | |
37853 | |
37854 | if (any_of(Ops, [RootSizeInBits](SDValue Op) { |
37855 | return Op.getValueSizeInBits() < RootSizeInBits; |
37856 | })) { |
37857 | for (SDValue &Op : Ops) |
37858 | if (Op.getValueSizeInBits() < RootSizeInBits) |
37859 | Op = widenSubVector(Op, false, Subtarget, DAG, SDLoc(Op), |
37860 | RootSizeInBits); |
37861 | |
37862 | resolveTargetShuffleInputsAndMask(Ops, Mask); |
37863 | } |
37864 | |
37865 | |
37866 | if (Ops.size() <= 2) { |
37867 | |
37868 | |
37869 | |
37870 | |
37871 | |
37872 | while (Mask.size() > 1) { |
37873 | SmallVector<int, 64> WidenedMask; |
37874 | if (!canWidenShuffleElements(Mask, WidenedMask)) |
37875 | break; |
37876 | Mask = std::move(WidenedMask); |
37877 | } |
37878 | |
37879 | |
37880 | |
37881 | if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) { |
37882 | ShuffleVectorSDNode::commuteMask(Mask); |
37883 | std::swap(Ops[0], Ops[1]); |
37884 | } |
37885 | |
37886 | |
37887 | return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, |
37888 | AllowVariableCrossLaneMask, |
37889 | AllowVariablePerLaneMask, DAG, Subtarget); |
37890 | } |
37891 | |
37892 | |
37893 | |
37894 | return combineX86ShuffleChainWithExtract( |
37895 | Ops, Root, Mask, Depth, HasVariableMask, AllowVariableCrossLaneMask, |
37896 | AllowVariablePerLaneMask, DAG, Subtarget); |
37897 | } |
37898 | |
37899 | |
37900 | static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG, |
37901 | const X86Subtarget &Subtarget) { |
37902 | return combineX86ShufflesRecursively( |
37903 | {Op}, 0, Op, {0}, {}, 0, X86::MaxShuffleCombineDepth, |
37904 | false, |
37905 | true, true, DAG, |
37906 | Subtarget); |
37907 | } |
37908 | |
37909 | |
37910 | |
37911 | |
37912 | |
37913 | static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) { |
37914 | MVT VT = N.getSimpleValueType(); |
37915 | SmallVector<int, 4> Mask; |
37916 | SmallVector<SDValue, 2> Ops; |
37917 | bool HaveMask = |
37918 | getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask); |
37919 | (void)HaveMask; |
37920 | assert(HaveMask); |
37921 | |
37922 | |
37923 | |
37924 | if (VT.getSizeInBits() > 128) { |
37925 | int LaneElts = 128 / VT.getScalarSizeInBits(); |
37926 | #ifndef NDEBUG |
37927 | for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i) |
37928 | for (int j = 0; j < LaneElts; ++j) |
37929 | assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) && |
37930 | "Mask doesn't repeat in high 128-bit lanes!"); |
37931 | #endif |
37932 | Mask.resize(LaneElts); |
37933 | } |
37934 | |
37935 | switch (N.getOpcode()) { |
37936 | case X86ISD::PSHUFD: |
37937 | return Mask; |
37938 | case X86ISD::PSHUFLW: |
37939 | Mask.resize(4); |
37940 | return Mask; |
37941 | case X86ISD::PSHUFHW: |
37942 | Mask.erase(Mask.begin(), Mask.begin() + 4); |
37943 | for (int &M : Mask) |
37944 | M -= 4; |
37945 | return Mask; |
37946 | default: |
37947 | llvm_unreachable("No valid shuffle instruction found!"); |
37948 | } |
37949 | } |
37950 | |
37951 | |
37952 | |
37953 | |
37954 | |
37955 | |
37956 | static SDValue |
37957 | combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask, |
37958 | SelectionDAG &DAG) { |
37959 | assert(N.getOpcode() == X86ISD::PSHUFD && |
37960 | "Called with something other than an x86 128-bit half shuffle!"); |
37961 | SDLoc DL(N); |
37962 | |
37963 | |
37964 | |
37965 | |
37966 | SmallVector<SDValue, 8> Chain; |
37967 | SDValue V = N.getOperand(0); |
37968 | for (; V.hasOneUse(); V = V.getOperand(0)) { |
37969 | switch (V.getOpcode()) { |
37970 | default: |
37971 | return SDValue(); |
37972 | |
37973 | case ISD::BITCAST: |
37974 | |
37975 | |
37976 | continue; |
37977 | |
37978 | case X86ISD::PSHUFD: |
37979 | |
37980 | break; |
37981 | |
37982 | case X86ISD::PSHUFLW: |
37983 | |
37984 | |
37985 | if (Mask[0] != 0 || Mask[1] != 1 || |
37986 | !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4)) |
37987 | return SDValue(); |
37988 | |
37989 | Chain.push_back(V); |
37990 | continue; |
37991 | |
37992 | case X86ISD::PSHUFHW: |
37993 | |
37994 | |
37995 | if (Mask[2] != 2 || Mask[3] != 3 || |
37996 | !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2)) |
37997 | return SDValue(); |
37998 | |
37999 | Chain.push_back(V); |
38000 | continue; |
38001 | |
38002 | case X86ISD::UNPCKL: |
38003 | case X86ISD::UNPCKH: |
38004 | |
38005 | |
38006 | if (V.getSimpleValueType().getVectorElementType() != MVT::i8 && |
38007 | V.getSimpleValueType().getVectorElementType() != MVT::i16) |
38008 | return SDValue(); |
38009 | |
38010 | |
38011 | unsigned CombineOp = |
38012 | V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; |
38013 | if (V.getOperand(0) != V.getOperand(1) || |
38014 | !V->isOnlyUserOf(V.getOperand(0).getNode())) |
38015 | return SDValue(); |
38016 | Chain.push_back(V); |
38017 | V = V.getOperand(0); |
38018 | do { |
38019 | switch (V.getOpcode()) { |
38020 | default: |
38021 | return SDValue(); |
38022 | |
38023 | case X86ISD::PSHUFLW: |
38024 | case X86ISD::PSHUFHW: |
38025 | if (V.getOpcode() == CombineOp) |
38026 | break; |
38027 | |
38028 | Chain.push_back(V); |
38029 | |
38030 | LLVM_FALLTHROUGH; |
38031 | case ISD::BITCAST: |
38032 | V = V.getOperand(0); |
38033 | continue; |
38034 | } |
38035 | break; |
38036 | } while (V.hasOneUse()); |
38037 | break; |
38038 | } |
38039 | |
38040 | break; |
38041 | } |
38042 | |
38043 | if (!V.hasOneUse()) |
38044 | |
38045 | return SDValue(); |
38046 | |
38047 | |
38048 | SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); |
38049 | for (int &M : Mask) |
38050 | M = VMask[M]; |
38051 | V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), |
38052 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
38053 | |
38054 | |
38055 | while (!Chain.empty()) { |
38056 | SDValue W = Chain.pop_back_val(); |
38057 | |
38058 | if (V.getValueType() != W.getOperand(0).getValueType()) |
38059 | V = DAG.getBitcast(W.getOperand(0).getValueType(), V); |
38060 | |
38061 | switch (W.getOpcode()) { |
38062 | default: |
38063 | llvm_unreachable("Only PSHUF and UNPCK instructions get here!"); |
38064 | |
38065 | case X86ISD::UNPCKL: |
38066 | case X86ISD::UNPCKH: |
38067 | V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V); |
38068 | break; |
38069 | |
38070 | case X86ISD::PSHUFD: |
38071 | case X86ISD::PSHUFLW: |
38072 | case X86ISD::PSHUFHW: |
38073 | V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1)); |
38074 | break; |
38075 | } |
38076 | } |
38077 | if (V.getValueType() != N.getValueType()) |
38078 | V = DAG.getBitcast(N.getValueType(), V); |
38079 | |
38080 | |
38081 | return V; |
38082 | } |
38083 | |
38084 | |
38085 | |
38086 | static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, |
38087 | SelectionDAG &DAG) { |
38088 | |
38089 | if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32) |
38090 | return SDValue(); |
38091 | |
38092 | |
38093 | auto commuteSHUFP = [&VT, &DL, &DAG](SDValue Parent, SDValue V) { |
38094 | if (V.getOpcode() != X86ISD::SHUFP || !Parent->isOnlyUserOf(V.getNode())) |
38095 | return SDValue(); |
38096 | SDValue N0 = V.getOperand(0); |
38097 | SDValue N1 = V.getOperand(1); |
38098 | unsigned Imm = V.getConstantOperandVal(2); |
38099 | if (!MayFoldLoad(peekThroughOneUseBitcasts(N0)) || |
38100 | MayFoldLoad(peekThroughOneUseBitcasts(N1))) |
38101 | return SDValue(); |
38102 | Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4); |
38103 | return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0, |
38104 | DAG.getTargetConstant(Imm, DL, MVT::i8)); |
38105 | }; |
38106 | |
38107 | switch (N.getOpcode()) { |
38108 | case X86ISD::VPERMILPI: |
38109 | if (SDValue NewSHUFP = commuteSHUFP(N, N.getOperand(0))) { |
38110 | unsigned Imm = N.getConstantOperandVal(1); |
38111 | return DAG.getNode(X86ISD::VPERMILPI, DL, VT, NewSHUFP, |
38112 | DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8)); |
38113 | } |
38114 | break; |
38115 | case X86ISD::SHUFP: { |
38116 | SDValue N0 = N.getOperand(0); |
38117 | SDValue N1 = N.getOperand(1); |
38118 | unsigned Imm = N.getConstantOperandVal(2); |
38119 | if (N0 == N1) { |
38120 | if (SDValue NewSHUFP = commuteSHUFP(N, N0)) |
38121 | return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, NewSHUFP, |
38122 | DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8)); |
38123 | } else if (SDValue NewSHUFP = commuteSHUFP(N, N0)) { |
38124 | return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, N1, |
38125 | DAG.getTargetConstant(Imm ^ 0x0A, DL, MVT::i8)); |
38126 | } else if (SDValue NewSHUFP = commuteSHUFP(N, N1)) { |
38127 | return DAG.getNode(X86ISD::SHUFP, DL, VT, N0, NewSHUFP, |
38128 | DAG.getTargetConstant(Imm ^ 0xA0, DL, MVT::i8)); |
38129 | } |
38130 | break; |
38131 | } |
38132 | } |
38133 | |
38134 | return SDValue(); |
38135 | } |
38136 | |
38137 | |
38138 | static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG, |
38139 | const SDLoc &DL) { |
38140 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
38141 | EVT ShuffleVT = N.getValueType(); |
38142 | |
38143 | auto IsMergeableWithShuffle = [](SDValue Op) { |
38144 | |
38145 | |
38146 | |
38147 | |
38148 | return ISD::isBuildVectorAllOnes(Op.getNode()) || |
38149 | ISD::isBuildVectorAllZeros(Op.getNode()) || |
38150 | ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || |
38151 | ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) || |
38152 | (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()); |
38153 | }; |
38154 | auto IsSafeToMoveShuffle = [ShuffleVT](SDValue Op, unsigned BinOp) { |
38155 | |
38156 | |
38157 | return BinOp == ISD::AND || BinOp == ISD::OR || BinOp == ISD::XOR || |
38158 | (Op.getScalarValueSizeInBits() <= ShuffleVT.getScalarSizeInBits()); |
38159 | }; |
38160 | |
38161 | unsigned Opc = N.getOpcode(); |
38162 | switch (Opc) { |
38163 | |
38164 | case X86ISD::PSHUFB: { |
38165 | |
38166 | SmallVector<int> Mask; |
38167 | SmallVector<SDValue> Ops; |
38168 | if (!getTargetShuffleMask(N.getNode(), ShuffleVT.getSimpleVT(), false, Ops, |
38169 | Mask)) |
38170 | break; |
38171 | LLVM_FALLTHROUGH; |
38172 | } |
38173 | case X86ISD::VBROADCAST: |
38174 | case X86ISD::MOVDDUP: |
38175 | case X86ISD::PSHUFD: |
38176 | case X86ISD::VPERMI: |
38177 | case X86ISD::VPERMILPI: { |
38178 | if (N.getOperand(0).getValueType() == ShuffleVT && |
38179 | N->isOnlyUserOf(N.getOperand(0).getNode())) { |
38180 | SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); |
38181 | unsigned SrcOpcode = N0.getOpcode(); |
38182 | if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) { |
38183 | SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0)); |
38184 | SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1)); |
38185 | if (IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op01)) { |
38186 | SDValue LHS, RHS; |
38187 | Op00 = DAG.getBitcast(ShuffleVT, Op00); |
38188 | Op01 = DAG.getBitcast(ShuffleVT, Op01); |
38189 | if (N.getNumOperands() == 2) { |
38190 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1)); |
38191 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, N.getOperand(1)); |
38192 | } else { |
38193 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00); |
38194 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01); |
38195 | } |
38196 | EVT OpVT = N0.getValueType(); |
38197 | return DAG.getBitcast(ShuffleVT, |
38198 | DAG.getNode(SrcOpcode, DL, OpVT, |
38199 | DAG.getBitcast(OpVT, LHS), |
38200 | DAG.getBitcast(OpVT, RHS))); |
38201 | } |
38202 | } |
38203 | } |
38204 | break; |
38205 | } |
38206 | |
38207 | case X86ISD::INSERTPS: { |
38208 | |
38209 | unsigned InsertPSMask = N.getConstantOperandVal(2); |
38210 | unsigned ZeroMask = InsertPSMask & 0xF; |
38211 | if (ZeroMask != 0) |
38212 | break; |
38213 | LLVM_FALLTHROUGH; |
38214 | } |
38215 | case X86ISD::MOVSD: |
38216 | case X86ISD::MOVSS: |
38217 | case X86ISD::BLENDI: |
38218 | case X86ISD::SHUFP: |
38219 | case X86ISD::UNPCKH: |
38220 | case X86ISD::UNPCKL: { |
38221 | if (N->isOnlyUserOf(N.getOperand(0).getNode()) && |
38222 | N->isOnlyUserOf(N.getOperand(1).getNode())) { |
38223 | SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); |
38224 | SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1)); |
38225 | unsigned SrcOpcode = N0.getOpcode(); |
38226 | if (TLI.isBinOp(SrcOpcode) && N1.getOpcode() == SrcOpcode && |
38227 | IsSafeToMoveShuffle(N0, SrcOpcode) && |
38228 | IsSafeToMoveShuffle(N1, SrcOpcode)) { |
38229 | SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0)); |
38230 | SDValue Op10 = peekThroughOneUseBitcasts(N1.getOperand(0)); |
38231 | SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1)); |
38232 | SDValue Op11 = peekThroughOneUseBitcasts(N1.getOperand(1)); |
38233 | |
38234 | |
38235 | if (((IsMergeableWithShuffle(Op00) && IsMergeableWithShuffle(Op10)) || |
38236 | (IsMergeableWithShuffle(Op01) && IsMergeableWithShuffle(Op11))) || |
38237 | ((IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op10)) && |
38238 | (IsMergeableWithShuffle(Op01) || IsMergeableWithShuffle(Op11)))) { |
38239 | SDValue LHS, RHS; |
38240 | Op00 = DAG.getBitcast(ShuffleVT, Op00); |
38241 | Op10 = DAG.getBitcast(ShuffleVT, Op10); |
38242 | Op01 = DAG.getBitcast(ShuffleVT, Op01); |
38243 | Op11 = DAG.getBitcast(ShuffleVT, Op11); |
38244 | if (N.getNumOperands() == 3) { |
38245 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10, N.getOperand(2)); |
38246 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11, N.getOperand(2)); |
38247 | } else { |
38248 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10); |
38249 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11); |
38250 | } |
38251 | EVT OpVT = N0.getValueType(); |
38252 | return DAG.getBitcast(ShuffleVT, |
38253 | DAG.getNode(SrcOpcode, DL, OpVT, |
38254 | DAG.getBitcast(OpVT, LHS), |
38255 | DAG.getBitcast(OpVT, RHS))); |
38256 | } |
38257 | } |
38258 | } |
38259 | break; |
38260 | } |
38261 | } |
38262 | return SDValue(); |
38263 | } |
38264 | |
38265 | |
38266 | static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, |
38267 | SelectionDAG &DAG, |
38268 | const SDLoc &DL) { |
38269 | assert(V.getOpcode() == X86ISD::VPERM2X128 && "Unknown lane shuffle"); |
38270 | |
38271 | MVT VT = V.getSimpleValueType(); |
38272 | SDValue Src0 = peekThroughBitcasts(V.getOperand(0)); |
38273 | SDValue Src1 = peekThroughBitcasts(V.getOperand(1)); |
38274 | unsigned SrcOpc0 = Src0.getOpcode(); |
38275 | unsigned SrcOpc1 = Src1.getOpcode(); |
38276 | EVT SrcVT0 = Src0.getValueType(); |
38277 | EVT SrcVT1 = Src1.getValueType(); |
38278 | |
38279 | if (!Src1.isUndef() && (SrcVT0 != SrcVT1 || SrcOpc0 != SrcOpc1)) |
38280 | return SDValue(); |
38281 | |
38282 | switch (SrcOpc0) { |
38283 | case X86ISD::MOVDDUP: { |
38284 | SDValue LHS = Src0.getOperand(0); |
38285 | SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0); |
38286 | SDValue Res = |
38287 | DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT0, LHS, RHS, V.getOperand(2)); |
38288 | Res = DAG.getNode(SrcOpc0, DL, SrcVT0, Res); |
38289 | return DAG.getBitcast(VT, Res); |
38290 | } |
38291 | case X86ISD::VPERMILPI: |
38292 | |
38293 | if (SrcVT0 == MVT::v4f64) { |
38294 | uint64_t Mask = Src0.getConstantOperandVal(1); |
38295 | if ((Mask & 0x3) != ((Mask >> 2) & 0x3)) |
38296 | break; |
38297 | } |
38298 | LLVM_FALLTHROUGH; |
38299 | case X86ISD::VSHLI: |
38300 | case X86ISD::VSRLI: |
38301 | case X86ISD::VSRAI: |
38302 | case X86ISD::PSHUFD: |
38303 | if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) { |
38304 | SDValue LHS = Src0.getOperand(0); |
38305 | SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0); |
38306 | SDValue Res = DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT0, LHS, RHS, |
38307 | V.getOperand(2)); |
38308 | Res = DAG.getNode(SrcOpc0, DL, SrcVT0, Res, Src0.getOperand(1)); |
38309 | return DAG.getBitcast(VT, Res); |
38310 | } |
38311 | break; |
38312 | } |
38313 | |
38314 | return SDValue(); |
38315 | } |
38316 | |
38317 | |
38318 | static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, |
38319 | TargetLowering::DAGCombinerInfo &DCI, |
38320 | const X86Subtarget &Subtarget) { |
38321 | SDLoc DL(N); |
38322 | MVT VT = N.getSimpleValueType(); |
38323 | SmallVector<int, 4> Mask; |
38324 | unsigned Opcode = N.getOpcode(); |
38325 | |
38326 | if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) |
38327 | return R; |
38328 | |
38329 | if (SDValue R = canonicalizeShuffleWithBinOps(N, DAG, DL)) |
38330 | return R; |
38331 | |
38332 | |
38333 | switch (Opcode) { |
38334 | case X86ISD::MOVDDUP: { |
38335 | SDValue Src = N.getOperand(0); |
38336 | |
38337 | if (VT == MVT::v2f64 && Src.hasOneUse() && |
38338 | ISD::isNormalLoad(Src.getNode())) { |
38339 | LoadSDNode *LN = cast<LoadSDNode>(Src); |
38340 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::f64, MVT::v2f64, DAG)) { |
38341 | SDValue Movddup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, VZLoad); |
38342 | DCI.CombineTo(N.getNode(), Movddup); |
38343 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
38344 | DCI.recursivelyDeleteUnusedNodes(LN); |
38345 | return N; |
38346 | } |
38347 | } |
38348 | |
38349 | return SDValue(); |
38350 | } |
38351 | case X86ISD::VBROADCAST: { |
38352 | SDValue Src = N.getOperand(0); |
38353 | SDValue BC = peekThroughBitcasts(Src); |
38354 | EVT SrcVT = Src.getValueType(); |
38355 | EVT BCVT = BC.getValueType(); |
38356 | |
38357 | |
38358 | |
38359 | if (isTargetShuffle(BC.getOpcode()) && |
38360 | VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) { |
38361 | unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits(); |
38362 | SmallVector<int, 16> DemandedMask(BCVT.getVectorNumElements(), |
38363 | SM_SentinelUndef); |
38364 | for (unsigned i = 0; i != Scale; ++i) |
38365 | DemandedMask[i] = i; |
38366 | if (SDValue Res = combineX86ShufflesRecursively( |
38367 | {BC}, 0, BC, DemandedMask, {}, 0, |
38368 | X86::MaxShuffleCombineDepth, |
38369 | false, true, |
38370 | true, DAG, Subtarget)) |
38371 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
38372 | DAG.getBitcast(SrcVT, Res)); |
38373 | } |
38374 | |
38375 | |
38376 | |
38377 | if (Src.getOpcode() == ISD::BITCAST && |
38378 | SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits() && |
38379 | DAG.getTargetLoweringInfo().isTypeLegal(BCVT) && |
38380 | FixedVectorType::isValidElementType( |
38381 | BCVT.getScalarType().getTypeForEVT(*DAG.getContext()))) { |
38382 | EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(), |
38383 | VT.getVectorNumElements()); |
38384 | return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC)); |
38385 | } |
38386 | |
38387 | |
38388 | if (SrcVT.getSizeInBits() > 128) |
38389 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
38390 | extract128BitVector(Src, 0, DAG, DL)); |
38391 | |
38392 | |
38393 | if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR) |
38394 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); |
38395 | |
38396 | |
38397 | if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
38398 | isNullConstant(Src.getOperand(1)) && |
38399 | DAG.getTargetLoweringInfo().isTypeLegal( |
38400 | Src.getOperand(0).getValueType())) |
38401 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); |
38402 | |
38403 | |
38404 | |
38405 | for (SDNode *User : Src->uses()) |
38406 | if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST && |
38407 | Src == User->getOperand(0) && |
38408 | User->getValueSizeInBits(0).getFixedSize() > |
38409 | VT.getFixedSizeInBits()) { |
38410 | return extractSubVector(SDValue(User, 0), 0, DAG, DL, |
38411 | VT.getSizeInBits()); |
38412 | } |
38413 | |
38414 | |
38415 | |
38416 | if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) && |
38417 | ISD::isNormalLoad(Src.getNode())) { |
38418 | LoadSDNode *LN = cast<LoadSDNode>(Src); |
38419 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
38420 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
38421 | SDValue BcastLd = |
38422 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, |
38423 | LN->getMemoryVT(), LN->getMemOperand()); |
38424 | |
38425 | bool NoReplaceExtract = Src.hasOneUse(); |
38426 | DCI.CombineTo(N.getNode(), BcastLd); |
38427 | if (NoReplaceExtract) { |
38428 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
38429 | DCI.recursivelyDeleteUnusedNodes(LN); |
38430 | } else { |
38431 | SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd, |
38432 | DAG.getIntPtrConstant(0, DL)); |
38433 | DCI.CombineTo(LN, Scl, BcastLd.getValue(1)); |
38434 | } |
38435 | return N; |
38436 | } |
38437 | |
38438 | |
38439 | |
38440 | if (SrcVT == MVT::i16 && Src.getOpcode() == ISD::TRUNCATE && |
38441 | Src.hasOneUse() && Src.getOperand(0).hasOneUse()) { |
38442 | assert(Subtarget.hasAVX2() && "Expected AVX2"); |
38443 | SDValue TruncIn = Src.getOperand(0); |
38444 | |
38445 | |
38446 | |
38447 | if (ISD::isNormalLoad(TruncIn.getNode())) { |
38448 | LoadSDNode *LN = cast<LoadSDNode>(TruncIn); |
38449 | |
38450 | if (LN->isSimple()) { |
38451 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
38452 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
38453 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
38454 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16, |
38455 | LN->getPointerInfo(), LN->getOriginalAlign(), |
38456 | LN->getMemOperand()->getFlags()); |
38457 | DCI.CombineTo(N.getNode(), BcastLd); |
38458 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
38459 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
38460 | return N; |
38461 | } |
38462 | } |
38463 | |
38464 | |
38465 | if (ISD::isUNINDEXEDLoad(Src.getOperand(0).getNode()) && |
38466 | ISD::isEXTLoad(Src.getOperand(0).getNode())) { |
38467 | LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0)); |
38468 | if (LN->getMemoryVT().getSizeInBits() == 16) { |
38469 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
38470 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
38471 | SDValue BcastLd = |
38472 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, |
38473 | LN->getMemoryVT(), LN->getMemOperand()); |
38474 | DCI.CombineTo(N.getNode(), BcastLd); |
38475 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
38476 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
38477 | return N; |
38478 | } |
38479 | } |
38480 | |
38481 | |
38482 | |
38483 | if (TruncIn.getOpcode() == ISD::SRL && |
38484 | TruncIn.getOperand(0).hasOneUse() && |
38485 | isa<ConstantSDNode>(TruncIn.getOperand(1)) && |
38486 | ISD::isNormalLoad(TruncIn.getOperand(0).getNode())) { |
38487 | LoadSDNode *LN = cast<LoadSDNode>(TruncIn.getOperand(0)); |
38488 | unsigned ShiftAmt = TruncIn.getConstantOperandVal(1); |
38489 | |
38490 | |
38491 | if (ShiftAmt % 16 == 0 && TruncIn.getValueSizeInBits() % 16 == 0 && |
38492 | LN->isSimple()) { |
38493 | unsigned Offset = ShiftAmt / 8; |
38494 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
38495 | SDValue Ptr = DAG.getMemBasePlusOffset(LN->getBasePtr(), |
38496 | TypeSize::Fixed(Offset), DL); |
38497 | SDValue Ops[] = { LN->getChain(), Ptr }; |
38498 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
38499 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16, |
38500 | LN->getPointerInfo().getWithOffset(Offset), |
38501 | LN->getOriginalAlign(), |
38502 | LN->getMemOperand()->getFlags()); |
38503 | DCI.CombineTo(N.getNode(), BcastLd); |
38504 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
38505 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
38506 | return N; |
38507 | } |
38508 | } |
38509 | } |
38510 | |
38511 | |
38512 | if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) { |
38513 | MemSDNode *LN = cast<MemIntrinsicSDNode>(Src); |
38514 | if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) { |
38515 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
38516 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
38517 | SDValue BcastLd = |
38518 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, |
38519 | LN->getMemoryVT(), LN->getMemOperand()); |
38520 | DCI.CombineTo(N.getNode(), BcastLd); |
38521 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
38522 | DCI.recursivelyDeleteUnusedNodes(LN); |
38523 | return N; |
38524 | } |
38525 | } |
38526 | |
38527 | |
38528 | if ((SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v2i64 || |
38529 | SrcVT == MVT::v4i32) && |
38530 | Src.hasOneUse() && ISD::isNormalLoad(Src.getNode())) { |
38531 | LoadSDNode *LN = cast<LoadSDNode>(Src); |
38532 | |
38533 | if (LN->isSimple()) { |
38534 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
38535 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
38536 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
38537 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SrcVT.getScalarType(), |
38538 | LN->getPointerInfo(), LN->getOriginalAlign(), |
38539 | LN->getMemOperand()->getFlags()); |
38540 | DCI.CombineTo(N.getNode(), BcastLd); |
38541 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
38542 | DCI.recursivelyDeleteUnusedNodes(LN); |
38543 | return N; |
38544 | } |
38545 | } |
38546 | |
38547 | return SDValue(); |
38548 | } |
38549 | case X86ISD::VZEXT_MOVL: { |
38550 | SDValue N0 = N.getOperand(0); |
38551 | |
38552 | |
38553 | |
38554 | if (N0.hasOneUse() && ISD::isNormalLoad(N0.getNode())) { |
38555 | auto *LN = cast<LoadSDNode>(N0); |
38556 | if (SDValue VZLoad = |
38557 | narrowLoadToVZLoad(LN, VT.getVectorElementType(), VT, DAG)) { |
38558 | DCI.CombineTo(N.getNode(), VZLoad); |
38559 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
38560 | DCI.recursivelyDeleteUnusedNodes(LN); |
38561 | return N; |
38562 | } |
38563 | } |
38564 | |
38565 | |
38566 | |
38567 | |
38568 | if (N0.hasOneUse() && N0.getOpcode() == X86ISD::VBROADCAST_LOAD) { |
38569 | auto *LN = cast<MemSDNode>(N0); |
38570 | if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) { |
38571 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
38572 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
38573 | SDValue VZLoad = |
38574 | DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, |
38575 | LN->getMemoryVT(), LN->getMemOperand()); |
38576 | DCI.CombineTo(N.getNode(), VZLoad); |
38577 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
38578 | DCI.recursivelyDeleteUnusedNodes(LN); |
38579 | return N; |
38580 | } |
38581 | } |
38582 | |
38583 | |
38584 | |
38585 | |
38586 | if (N0.hasOneUse() && N0.getOpcode() == ISD::SCALAR_TO_VECTOR && |
38587 | N0.getOperand(0).hasOneUse() && |
38588 | N0.getOperand(0).getValueType() == MVT::i64) { |
38589 | SDValue In = N0.getOperand(0); |
38590 | APInt Mask = APInt::getHighBitsSet(64, 32); |
38591 | if (DAG.MaskedValueIsZero(In, Mask)) { |
38592 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, In); |
38593 | MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); |
38594 | SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Trunc); |
38595 | SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, SclVec); |
38596 | return DAG.getBitcast(VT, Movl); |
38597 | } |
38598 | } |
38599 | |
38600 | |
38601 | |
38602 | |
38603 | if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR) { |
38604 | if (auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { |
38605 | |
38606 | EVT ScalarVT = N0.getOperand(0).getValueType(); |
38607 | Type *ScalarTy = ScalarVT.getTypeForEVT(*DAG.getContext()); |
38608 | unsigned NumElts = VT.getVectorNumElements(); |
38609 | Constant *Zero = ConstantInt::getNullValue(ScalarTy); |
38610 | SmallVector<Constant *, 32> ConstantVec(NumElts, Zero); |
38611 | ConstantVec[0] = const_cast<ConstantInt *>(C->getConstantIntValue()); |
38612 | |
38613 | |
38614 | MVT PVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
38615 | SDValue CP = DAG.getConstantPool(ConstantVector::get(ConstantVec), PVT); |
38616 | MachinePointerInfo MPI = |
38617 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
38618 | Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); |
38619 | return DAG.getLoad(VT, DL, DAG.getEntryNode(), CP, MPI, Alignment, |
38620 | MachineMemOperand::MOLoad); |
38621 | } |
38622 | } |
38623 | |
38624 | |
38625 | |
38626 | |
38627 | |
38628 | if (!DCI.isBeforeLegalizeOps() && N0.hasOneUse()) { |
38629 | SDValue V = peekThroughOneUseBitcasts(N0); |
38630 | |
38631 | if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.getOperand(0).isUndef() && |
38632 | isNullConstant(V.getOperand(2))) { |
38633 | SDValue In = V.getOperand(1); |
38634 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), |
38635 | In.getValueSizeInBits() / |
38636 | VT.getScalarSizeInBits()); |
38637 | In = DAG.getBitcast(SubVT, In); |
38638 | SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, SubVT, In); |
38639 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
38640 | getZeroVector(VT, Subtarget, DAG, DL), Movl, |
38641 | V.getOperand(2)); |
38642 | } |
38643 | } |
38644 | |
38645 | return SDValue(); |
38646 | } |
38647 | case X86ISD::BLENDI: { |
38648 | SDValue N0 = N.getOperand(0); |
38649 | SDValue N1 = N.getOperand(1); |
38650 | |
38651 | |
38652 | |
38653 | if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST && |
38654 | N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { |
38655 | MVT SrcVT = N0.getOperand(0).getSimpleValueType(); |
38656 | if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
38657 | SrcVT.getScalarSizeInBits() >= 32) { |
38658 | unsigned BlendMask = N.getConstantOperandVal(2); |
38659 | unsigned Size = VT.getVectorNumElements(); |
38660 | unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits(); |
38661 | BlendMask = scaleVectorShuffleBlendMask(BlendMask, Size, Scale); |
38662 | return DAG.getBitcast( |
38663 | VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0), |
38664 | N1.getOperand(0), |
38665 | DAG.getTargetConstant(BlendMask, DL, MVT::i8))); |
38666 | } |
38667 | } |
38668 | return SDValue(); |
38669 | } |
38670 | case X86ISD::VPERMI: { |
38671 | |
38672 | |
38673 | SDValue N0 = N.getOperand(0); |
38674 | SDValue N1 = N.getOperand(1); |
38675 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
38676 | if (N0.getOpcode() == ISD::BITCAST && |
38677 | N0.getOperand(0).getScalarValueSizeInBits() == EltSizeInBits) { |
38678 | SDValue Src = N0.getOperand(0); |
38679 | EVT SrcVT = Src.getValueType(); |
38680 | SDValue Res = DAG.getNode(X86ISD::VPERMI, DL, SrcVT, Src, N1); |
38681 | return DAG.getBitcast(VT, Res); |
38682 | } |
38683 | return SDValue(); |
38684 | } |
38685 | case X86ISD::VPERM2X128: { |
38686 | |
38687 | SDValue LHS = N->getOperand(0); |
38688 | SDValue RHS = N->getOperand(1); |
38689 | if (LHS.getOpcode() == ISD::BITCAST && |
38690 | (RHS.getOpcode() == ISD::BITCAST || RHS.isUndef())) { |
38691 | EVT SrcVT = LHS.getOperand(0).getValueType(); |
38692 | if (RHS.isUndef() || SrcVT == RHS.getOperand(0).getValueType()) { |
38693 | return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT, |
38694 | DAG.getBitcast(SrcVT, LHS), |
38695 | DAG.getBitcast(SrcVT, RHS), |
38696 | N->getOperand(2))); |
38697 | } |
38698 | } |
38699 | |
38700 | |
38701 | if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL)) |
38702 | return Res; |
38703 | |
38704 | |
38705 | |
38706 | auto FindSubVector128 = [&](unsigned Idx) { |
38707 | if (Idx > 3) |
38708 | return SDValue(); |
38709 | SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1)); |
38710 | SmallVector<SDValue> SubOps; |
38711 | if (collectConcatOps(Src.getNode(), SubOps) && SubOps.size() == 2) |
38712 | return SubOps[Idx & 1]; |
38713 | unsigned NumElts = Src.getValueType().getVectorNumElements(); |
38714 | if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR && |
38715 | Src.getOperand(1).getValueSizeInBits() == 128 && |
38716 | Src.getConstantOperandAPInt(2) == (NumElts / 2)) { |
38717 | return Src.getOperand(1); |
38718 | } |
38719 | return SDValue(); |
38720 | }; |
38721 | unsigned Imm = N.getConstantOperandVal(2); |
38722 | if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) { |
38723 | if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) { |
38724 | MVT SubVT = VT.getHalfNumVectorElementsVT(); |
38725 | SubLo = DAG.getBitcast(SubVT, SubLo); |
38726 | SubHi = DAG.getBitcast(SubVT, SubHi); |
38727 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi); |
38728 | } |
38729 | } |
38730 | return SDValue(); |
38731 | } |
38732 | case X86ISD::PSHUFD: |
38733 | case X86ISD::PSHUFLW: |
38734 | case X86ISD::PSHUFHW: |
38735 | Mask = getPSHUFShuffleMask(N); |
38736 | assert(Mask.size() == 4); |
38737 | break; |
38738 | case X86ISD::MOVSD: |
38739 | case X86ISD::MOVSH: |
38740 | case X86ISD::MOVSS: { |
38741 | SDValue N0 = N.getOperand(0); |
38742 | SDValue N1 = N.getOperand(1); |
38743 | |
38744 | |
38745 | |
38746 | |
38747 | unsigned Opcode1 = N1.getOpcode(); |
38748 | if (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL || Opcode1 == ISD::FSUB || |
38749 | Opcode1 == ISD::FDIV) { |
38750 | SDValue N10 = N1.getOperand(0); |
38751 | SDValue N11 = N1.getOperand(1); |
38752 | if (N10 == N0 || |
38753 | (N11 == N0 && (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL))) { |
38754 | if (N10 != N0) |
38755 | std::swap(N10, N11); |
38756 | MVT SVT = VT.getVectorElementType(); |
38757 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); |
38758 | N10 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N10, ZeroIdx); |
38759 | N11 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N11, ZeroIdx); |
38760 | SDValue Scl = DAG.getNode(Opcode1, DL, SVT, N10, N11); |
38761 | SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); |
38762 | return DAG.getNode(Opcode, DL, VT, N0, SclVec); |
38763 | } |
38764 | } |
38765 | |
38766 | return SDValue(); |
38767 | } |
38768 | case X86ISD::INSERTPS: { |
38769 | assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); |
38770 | SDValue Op0 = N.getOperand(0); |
38771 | SDValue Op1 = N.getOperand(1); |
38772 | unsigned InsertPSMask = N.getConstantOperandVal(2); |
38773 | unsigned SrcIdx = (InsertPSMask >> 6) & 0x3; |
38774 | unsigned DstIdx = (InsertPSMask >> 4) & 0x3; |
38775 | unsigned ZeroMask = InsertPSMask & 0xF; |
38776 | |
38777 | |
38778 | if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef()) |
38779 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, |
38780 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38781 | |
38782 | |
38783 | if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef()) |
38784 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), |
38785 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38786 | |
38787 | |
38788 | SmallVector<int, 8> TargetMask1; |
38789 | SmallVector<SDValue, 2> Ops1; |
38790 | APInt KnownUndef1, KnownZero1; |
38791 | if (getTargetShuffleAndZeroables(Op1, TargetMask1, Ops1, KnownUndef1, |
38792 | KnownZero1)) { |
38793 | if (KnownUndef1[SrcIdx] || KnownZero1[SrcIdx]) { |
38794 | |
38795 | InsertPSMask |= (1u << DstIdx); |
38796 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), |
38797 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38798 | } |
38799 | |
38800 | int M = TargetMask1[SrcIdx]; |
38801 | assert(0 <= M && M < 8 && "Shuffle index out of range"); |
38802 | InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); |
38803 | Op1 = Ops1[M < 4 ? 0 : 1]; |
38804 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, |
38805 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38806 | } |
38807 | |
38808 | |
38809 | SmallVector<int, 8> TargetMask0; |
38810 | SmallVector<SDValue, 2> Ops0; |
38811 | APInt KnownUndef0, KnownZero0; |
38812 | if (getTargetShuffleAndZeroables(Op0, TargetMask0, Ops0, KnownUndef0, |
38813 | KnownZero0)) { |
38814 | bool Updated = false; |
38815 | bool UseInput00 = false; |
38816 | bool UseInput01 = false; |
38817 | for (int i = 0; i != 4; ++i) { |
38818 | if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { |
38819 | |
38820 | continue; |
38821 | } else if (KnownUndef0[i] || KnownZero0[i]) { |
38822 | |
38823 | InsertPSMask |= (1u << i); |
38824 | Updated = true; |
38825 | continue; |
38826 | } |
38827 | |
38828 | |
38829 | int M = TargetMask0[i]; |
38830 | if (M != i && M != (i + 4)) |
38831 | return SDValue(); |
38832 | |
38833 | |
38834 | UseInput00 |= (0 <= M && M < 4); |
38835 | UseInput01 |= (4 <= M); |
38836 | } |
38837 | |
38838 | |
38839 | |
38840 | if (UseInput00 && !UseInput01) { |
38841 | Updated = true; |
38842 | Op0 = Ops0[0]; |
38843 | } else if (!UseInput00 && UseInput01) { |
38844 | Updated = true; |
38845 | Op0 = Ops0[1]; |
38846 | } |
38847 | |
38848 | if (Updated) |
38849 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, |
38850 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38851 | } |
38852 | |
38853 | |
38854 | |
38855 | |
38856 | if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) { |
38857 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op1); |
38858 | if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) { |
38859 | SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(), |
38860 | MemIntr->getBasePtr(), |
38861 | MemIntr->getMemOperand()); |
38862 | SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, |
38863 | DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, |
38864 | Load), |
38865 | DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8)); |
38866 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); |
38867 | return Insert; |
38868 | } |
38869 | } |
38870 | |
38871 | return SDValue(); |
38872 | } |
38873 | default: |
38874 | return SDValue(); |
38875 | } |
38876 | |
38877 | |
38878 | if (isNoopShuffleMask(Mask)) |
38879 | return N.getOperand(0); |
38880 | |
38881 | |
38882 | SDValue V = N.getOperand(0); |
38883 | switch (N.getOpcode()) { |
38884 | default: |
38885 | break; |
38886 | case X86ISD::PSHUFLW: |
38887 | case X86ISD::PSHUFHW: |
38888 | assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!"); |
38889 | |
38890 | |
38891 | |
38892 | |
38893 | if (makeArrayRef(Mask).equals({2, 3, 0, 1})) { |
38894 | int DMask[] = {0, 1, 2, 3}; |
38895 | int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; |
38896 | DMask[DOffset + 0] = DOffset + 1; |
38897 | DMask[DOffset + 1] = DOffset + 0; |
38898 | MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); |
38899 | V = DAG.getBitcast(DVT, V); |
38900 | V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, |
38901 | getV4X86ShuffleImm8ForMask(DMask, DL, DAG)); |
38902 | return DAG.getBitcast(VT, V); |
38903 | } |
38904 | |
38905 | |
38906 | |
38907 | |
38908 | if (Mask[0] == Mask[1] && Mask[2] == Mask[3] && |
38909 | (V.getOpcode() == X86ISD::PSHUFLW || |
38910 | V.getOpcode() == X86ISD::PSHUFHW) && |
38911 | V.getOpcode() != N.getOpcode() && |
38912 | V.hasOneUse() && V.getOperand(0).hasOneUse()) { |
38913 | SDValue D = peekThroughOneUseBitcasts(V.getOperand(0)); |
38914 | if (D.getOpcode() == X86ISD::PSHUFD) { |
38915 | SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); |
38916 | SmallVector<int, 4> DMask = getPSHUFShuffleMask(D); |
38917 | int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; |
38918 | int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; |
38919 | int WordMask[8]; |
38920 | for (int i = 0; i < 4; ++i) { |
38921 | WordMask[i + NOffset] = Mask[i] + NOffset; |
38922 | WordMask[i + VOffset] = VMask[i] + VOffset; |
38923 | } |
38924 | |
38925 | int MappedMask[8]; |
38926 | for (int i = 0; i < 8; ++i) |
38927 | MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2; |
38928 | if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) || |
38929 | makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) { |
38930 | |
38931 | V = DAG.getBitcast(VT, D.getOperand(0)); |
38932 | return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL |
38933 | : X86ISD::UNPCKH, |
38934 | DL, VT, V, V); |
38935 | } |
38936 | } |
38937 | } |
38938 | |
38939 | break; |
38940 | |
38941 | case X86ISD::PSHUFD: |
38942 | if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG)) |
38943 | return NewN; |
38944 | |
38945 | break; |
38946 | } |
38947 | |
38948 | return SDValue(); |
38949 | } |
38950 | |
38951 | |
38952 | |
38953 | |
38954 | static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) { |
38955 | |
38956 | int ParitySrc[2] = {-1, -1}; |
38957 | unsigned Size = Mask.size(); |
38958 | for (unsigned i = 0; i != Size; ++i) { |
38959 | int M = Mask[i]; |
38960 | if (M < 0) |
38961 | continue; |
38962 | |
38963 | |
38964 | if ((M % Size) != i) |
38965 | return false; |
38966 | |
38967 | |
38968 | int Src = M / Size; |
38969 | if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src) |
38970 | return false; |
38971 | ParitySrc[i % 2] = Src; |
38972 | } |
38973 | |
38974 | |
38975 | if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1]) |
38976 | return false; |
38977 | |
38978 | Op0Even = ParitySrc[0] == 0; |
38979 | return true; |
38980 | } |
38981 | |
38982 | |
38983 | |
38984 | |
38985 | |
38986 | |
38987 | |
38988 | |
38989 | |
38990 | |
38991 | static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, |
38992 | SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, |
38993 | bool &IsSubAdd) { |
38994 | |
38995 | EVT VT = N->getValueType(0); |
38996 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
38997 | if (!Subtarget.hasSSE3() || !TLI.isTypeLegal(VT) || |
38998 | !VT.getSimpleVT().isFloatingPoint()) |
38999 | return false; |
39000 | |
39001 | |
39002 | |
39003 | |
39004 | if (N->getOpcode() != ISD::VECTOR_SHUFFLE) |
39005 | return false; |
39006 | |
39007 | SDValue V1 = N->getOperand(0); |
39008 | SDValue V2 = N->getOperand(1); |
39009 | |
39010 | |
39011 | if ((V1.getOpcode() != ISD::FADD && V1.getOpcode() != ISD::FSUB) || |
39012 | (V2.getOpcode() != ISD::FADD && V2.getOpcode() != ISD::FSUB) || |
39013 | V1.getOpcode() == V2.getOpcode()) |
39014 | return false; |
39015 | |
39016 | |
39017 | if (!V1->hasOneUse() || !V2->hasOneUse()) |
39018 | return false; |
39019 | |
39020 | |
39021 | |
39022 | SDValue LHS, RHS; |
39023 | if (V1.getOpcode() == ISD::FSUB) { |
39024 | LHS = V1->getOperand(0); RHS = V1->getOperand(1); |
39025 | if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) && |
39026 | (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS)) |
39027 | return false; |
39028 | } else { |
39029 | assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode"); |
39030 | LHS = V2->getOperand(0); RHS = V2->getOperand(1); |
39031 | if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) && |
39032 | (V1->getOperand(0) != RHS || V1->getOperand(1) != LHS)) |
39033 | return false; |
39034 | } |
39035 | |
39036 | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); |
39037 | bool Op0Even; |
39038 | if (!isAddSubOrSubAddMask(Mask, Op0Even)) |
39039 | return false; |
39040 | |
39041 | |
39042 | IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD |
39043 | : V2->getOpcode() == ISD::FADD; |
39044 | |
39045 | Opnd0 = LHS; |
39046 | Opnd1 = RHS; |
39047 | return true; |
39048 | } |
39049 | |
39050 | |
39051 | static SDValue combineShuffleToFMAddSub(SDNode *N, |
39052 | const X86Subtarget &Subtarget, |
39053 | SelectionDAG &DAG) { |
39054 | |
39055 | |
39056 | |
39057 | if (N->getOpcode() != ISD::VECTOR_SHUFFLE) |
39058 | return SDValue(); |
39059 | |
39060 | MVT VT = N->getSimpleValueType(0); |
39061 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
39062 | if (!Subtarget.hasAnyFMA() || !TLI.isTypeLegal(VT)) |
39063 | return SDValue(); |
39064 | |
39065 | |
39066 | SDValue Op0 = N->getOperand(0); |
39067 | SDValue Op1 = N->getOperand(1); |
39068 | SDValue FMAdd = Op0, FMSub = Op1; |
39069 | if (FMSub.getOpcode() != X86ISD::FMSUB) |
39070 | std::swap(FMAdd, FMSub); |
39071 | |
39072 | if (FMAdd.getOpcode() != ISD::FMA || FMSub.getOpcode() != X86ISD::FMSUB || |
39073 | FMAdd.getOperand(0) != FMSub.getOperand(0) || !FMAdd.hasOneUse() || |
39074 | FMAdd.getOperand(1) != FMSub.getOperand(1) || !FMSub.hasOneUse() || |
39075 | FMAdd.getOperand(2) != FMSub.getOperand(2)) |
39076 | return SDValue(); |
39077 | |
39078 | |
39079 | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); |
39080 | bool Op0Even; |
39081 | if (!isAddSubOrSubAddMask(Mask, Op0Even)) |
39082 | return SDValue(); |
39083 | |
39084 | |
39085 | SDLoc DL(N); |
39086 | bool IsSubAdd = Op0Even ? Op0 == FMAdd : Op1 == FMAdd; |
39087 | unsigned Opcode = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; |
39088 | return DAG.getNode(Opcode, DL, VT, FMAdd.getOperand(0), FMAdd.getOperand(1), |
39089 | FMAdd.getOperand(2)); |
39090 | } |
39091 | |
39092 | |
39093 | |
39094 | static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, |
39095 | const X86Subtarget &Subtarget, |
39096 | SelectionDAG &DAG) { |
39097 | if (SDValue V = combineShuffleToFMAddSub(N, Subtarget, DAG)) |
39098 | return V; |
39099 | |
39100 | SDValue Opnd0, Opnd1; |
39101 | bool IsSubAdd; |
39102 | if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd)) |
39103 | return SDValue(); |
39104 | |
39105 | MVT VT = N->getSimpleValueType(0); |
39106 | SDLoc DL(N); |
39107 | |
39108 | |
39109 | SDValue Opnd2; |
39110 | if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) { |
39111 | unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; |
39112 | return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); |
39113 | } |
39114 | |
39115 | if (IsSubAdd) |
39116 | return SDValue(); |
39117 | |
39118 | |
39119 | |
39120 | |
39121 | if (VT.is512BitVector()) |
39122 | return SDValue(); |
39123 | |
39124 | |
39125 | |
39126 | |
39127 | if (VT.getVectorElementType() == MVT::f16) |
39128 | return SDValue(); |
39129 | |
39130 | return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); |
39131 | } |
39132 | |
39133 | |
39134 | |
39135 | |
39136 | static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, |
39137 | const X86Subtarget &Subtarget) { |
39138 | if (!Subtarget.hasAVX2() || !isa<ShuffleVectorSDNode>(N)) |
39139 | return SDValue(); |
39140 | |
39141 | EVT VT = N->getValueType(0); |
39142 | |
39143 | |
39144 | if (!VT.is128BitVector() && !VT.is256BitVector()) |
39145 | return SDValue(); |
39146 | |
39147 | if (VT.getVectorElementType() != MVT::i32 && |
39148 | VT.getVectorElementType() != MVT::i64 && |
39149 | VT.getVectorElementType() != MVT::f32 && |
39150 | VT.getVectorElementType() != MVT::f64) |
39151 | return SDValue(); |
39152 | |
39153 | SDValue N0 = N->getOperand(0); |
39154 | SDValue N1 = N->getOperand(1); |
39155 | |
39156 | |
39157 | if (N0.getOpcode() != ISD::CONCAT_VECTORS || |
39158 | N1.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || |
39159 | N1.getNumOperands() != 2 || !N0.getOperand(1).isUndef() || |
39160 | !N1.getOperand(1).isUndef()) |
39161 | return SDValue(); |
39162 | |
39163 | |
39164 | |
39165 | SmallVector<int, 8> Mask; |
39166 | int NumElts = VT.getVectorNumElements(); |
39167 | |
39168 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
39169 | for (int Elt : SVOp->getMask()) |
39170 | Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2)); |
39171 | |
39172 | SDLoc DL(N); |
39173 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0), |
39174 | N1.getOperand(0)); |
39175 | return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask); |
39176 | } |
39177 | |
39178 | |
39179 | |
39180 | |
39181 | static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { |
39182 | if (!Shuf->getValueType(0).isSimple()) |
39183 | return SDValue(); |
39184 | MVT VT = Shuf->getSimpleValueType(0); |
39185 | if (!VT.is256BitVector() && !VT.is512BitVector()) |
39186 | return SDValue(); |
39187 | |
39188 | |
39189 | ArrayRef<int> Mask = Shuf->getMask(); |
39190 | if (!isUndefUpperHalf(Mask)) |
39191 | return SDValue(); |
39192 | |
39193 | |
39194 | |
39195 | int HalfIdx1, HalfIdx2; |
39196 | SmallVector<int, 8> HalfMask(Mask.size() / 2); |
39197 | if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2) || |
39198 | (HalfIdx1 % 2 == 1) || (HalfIdx2 % 2 == 1)) |
39199 | return SDValue(); |
39200 | |
39201 | |
39202 | |
39203 | |
39204 | |
39205 | |
39206 | return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0), |
39207 | Shuf->getOperand(1), HalfMask, HalfIdx1, |
39208 | HalfIdx2, false, DAG, true); |
39209 | } |
39210 | |
39211 | static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, |
39212 | TargetLowering::DAGCombinerInfo &DCI, |
39213 | const X86Subtarget &Subtarget) { |
39214 | if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N)) |
39215 | if (SDValue V = narrowShuffle(Shuf, DAG)) |
39216 | return V; |
39217 | |
39218 | |
39219 | |
39220 | SDLoc dl(N); |
39221 | EVT VT = N->getValueType(0); |
39222 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
39223 | if (TLI.isTypeLegal(VT)) |
39224 | if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) |
39225 | return AddSub; |
39226 | |
39227 | |
39228 | if (SDValue LD = combineToConsecutiveLoads( |
39229 | VT, SDValue(N, 0), dl, DAG, Subtarget, true)) |
39230 | return LD; |
39231 | |
39232 | |
39233 | |
39234 | |
39235 | |
39236 | |
39237 | |
39238 | if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget)) |
39239 | return ShufConcat; |
39240 | |
39241 | if (isTargetShuffle(N->getOpcode())) { |
39242 | SDValue Op(N, 0); |
39243 | if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget)) |
39244 | return Shuffle; |
39245 | |
39246 | |
39247 | |
39248 | |
39249 | |
39250 | |
39251 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
39252 | return Res; |
39253 | |
39254 | |
39255 | |
39256 | APInt KnownUndef, KnownZero; |
39257 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
39258 | if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, |
39259 | DCI)) |
39260 | return SDValue(N, 0); |
39261 | } |
39262 | |
39263 | return SDValue(); |
39264 | } |
39265 | |
39266 | |
39267 | |
39268 | bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle( |
39269 | SDValue Op, const APInt &DemandedElts, unsigned MaskIndex, |
39270 | TargetLowering::TargetLoweringOpt &TLO, unsigned Depth) const { |
39271 | |
39272 | unsigned NumElts = DemandedElts.getBitWidth(); |
39273 | if (DemandedElts.isAllOnesValue()) |
39274 | return false; |
39275 | |
39276 | SDValue Mask = Op.getOperand(MaskIndex); |
39277 | if (!Mask.hasOneUse()) |
39278 | return false; |
39279 | |
39280 | |
39281 | APInt MaskUndef, MaskZero; |
39282 | if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO, |
39283 | Depth + 1)) |
39284 | return true; |
39285 | |
39286 | |
39287 | |
39288 | SDValue BC = peekThroughOneUseBitcasts(Mask); |
39289 | EVT BCVT = BC.getValueType(); |
39290 | auto *Load = dyn_cast<LoadSDNode>(BC); |
39291 | if (!Load) |
39292 | return false; |
39293 | |
39294 | const Constant *C = getTargetConstantFromNode(Load); |
39295 | if (!C) |
39296 | return false; |
39297 | |
39298 | Type *CTy = C->getType(); |
39299 | if (!CTy->isVectorTy() || |
39300 | CTy->getPrimitiveSizeInBits() != Mask.getValueSizeInBits()) |
39301 | return false; |
39302 | |
39303 | |
39304 | unsigned NumCstElts = cast<FixedVectorType>(CTy)->getNumElements(); |
39305 | if (NumCstElts != NumElts && NumCstElts != (NumElts * 2)) |
39306 | return false; |
39307 | unsigned Scale = NumCstElts / NumElts; |
39308 | |
39309 | |
39310 | bool Simplified = false; |
39311 | SmallVector<Constant *, 32> ConstVecOps; |
39312 | for (unsigned i = 0; i != NumCstElts; ++i) { |
39313 | Constant *Elt = C->getAggregateElement(i); |
39314 | if (!DemandedElts[i / Scale] && !isa<UndefValue>(Elt)) { |
39315 | ConstVecOps.push_back(UndefValue::get(Elt->getType())); |
39316 | Simplified = true; |
39317 | continue; |
39318 | } |
39319 | ConstVecOps.push_back(Elt); |
39320 | } |
39321 | if (!Simplified) |
39322 | return false; |
39323 | |
39324 | |
39325 | SDLoc DL(Op); |
39326 | SDValue CV = TLO.DAG.getConstantPool(ConstantVector::get(ConstVecOps), BCVT); |
39327 | SDValue LegalCV = LowerConstantPool(CV, TLO.DAG); |
39328 | SDValue NewMask = TLO.DAG.getLoad( |
39329 | BCVT, DL, TLO.DAG.getEntryNode(), LegalCV, |
39330 | MachinePointerInfo::getConstantPool(TLO.DAG.getMachineFunction()), |
39331 | Load->getAlign()); |
39332 | return TLO.CombineTo(Mask, TLO.DAG.getBitcast(Mask.getValueType(), NewMask)); |
39333 | } |
39334 | |
39335 | bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( |
39336 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, |
39337 | TargetLoweringOpt &TLO, unsigned Depth) const { |
39338 | int NumElts = DemandedElts.getBitWidth(); |
39339 | unsigned Opc = Op.getOpcode(); |
39340 | EVT VT = Op.getValueType(); |
39341 | |
39342 | |
39343 | switch (Opc) { |
39344 | case X86ISD::PMULDQ: |
39345 | case X86ISD::PMULUDQ: { |
39346 | APInt LHSUndef, LHSZero; |
39347 | APInt RHSUndef, RHSZero; |
39348 | SDValue LHS = Op.getOperand(0); |
39349 | SDValue RHS = Op.getOperand(1); |
39350 | if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO, |
39351 | Depth + 1)) |
39352 | return true; |
39353 | if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO, |
39354 | Depth + 1)) |
39355 | return true; |
39356 | |
39357 | KnownZero = LHSZero | RHSZero; |
39358 | break; |
39359 | } |
39360 | case X86ISD::VSHL: |
39361 | case X86ISD::VSRL: |
39362 | case X86ISD::VSRA: { |
39363 | |
39364 | SDValue Amt = Op.getOperand(1); |
39365 | MVT AmtVT = Amt.getSimpleValueType(); |
39366 | assert(AmtVT.is128BitVector() && "Unexpected value type"); |
39367 | |
39368 | |
39369 | |
39370 | bool AssumeSingleUse = llvm::all_of(Amt->uses(), [&Amt](SDNode *Use) { |
39371 | unsigned UseOpc = Use->getOpcode(); |
39372 | return (UseOpc == X86ISD::VSHL || UseOpc == X86ISD::VSRL || |
39373 | UseOpc == X86ISD::VSRA) && |
39374 | Use->getOperand(0) != Amt; |
39375 | }); |
39376 | |
39377 | APInt AmtUndef, AmtZero; |
39378 | unsigned NumAmtElts = AmtVT.getVectorNumElements(); |
39379 | APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2); |
39380 | if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO, |
39381 | Depth + 1, AssumeSingleUse)) |
39382 | return true; |
39383 | LLVM_FALLTHROUGH; |
39384 | } |
39385 | case X86ISD::VSHLI: |
39386 | case X86ISD::VSRLI: |
39387 | case X86ISD::VSRAI: { |
39388 | SDValue Src = Op.getOperand(0); |
39389 | APInt SrcUndef; |
39390 | if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO, |
39391 | Depth + 1)) |
39392 | return true; |
39393 | |
39394 | |
39395 | if (!DemandedElts.isAllOnesValue()) |
39396 | if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( |
39397 | Src, DemandedElts, TLO.DAG, Depth + 1)) |
39398 | return TLO.CombineTo( |
39399 | Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1))); |
39400 | break; |
39401 | } |
39402 | case X86ISD::KSHIFTL: { |
39403 | SDValue Src = Op.getOperand(0); |
39404 | auto *Amt = cast<ConstantSDNode>(Op.getOperand(1)); |
39405 | assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); |
39406 | unsigned ShiftAmt = Amt->getZExtValue(); |
39407 | |
39408 | if (ShiftAmt == 0) |
39409 | return TLO.CombineTo(Op, Src); |
39410 | |
39411 | |
39412 | |
39413 | |
39414 | if (Src.getOpcode() == X86ISD::KSHIFTR) { |
39415 | if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) { |
39416 | unsigned C1 = Src.getConstantOperandVal(1); |
39417 | unsigned NewOpc = X86ISD::KSHIFTL; |
39418 | int Diff = ShiftAmt - C1; |
39419 | if (Diff < 0) { |
39420 | Diff = -Diff; |
39421 | NewOpc = X86ISD::KSHIFTR; |
39422 | } |
39423 | |
39424 | SDLoc dl(Op); |
39425 | SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); |
39426 | return TLO.CombineTo( |
39427 | Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); |
39428 | } |
39429 | } |
39430 | |
39431 | APInt DemandedSrc = DemandedElts.lshr(ShiftAmt); |
39432 | if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, |
39433 | Depth + 1)) |
39434 | return true; |
39435 | |
39436 | KnownUndef <<= ShiftAmt; |
39437 | KnownZero <<= ShiftAmt; |
39438 | KnownZero.setLowBits(ShiftAmt); |
39439 | break; |
39440 | } |
39441 | case X86ISD::KSHIFTR: { |
39442 | SDValue Src = Op.getOperand(0); |
39443 | auto *Amt = cast<ConstantSDNode>(Op.getOperand(1)); |
39444 | assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); |
39445 | unsigned ShiftAmt = Amt->getZExtValue(); |
39446 | |
39447 | if (ShiftAmt == 0) |
39448 | return TLO.CombineTo(Op, Src); |
39449 | |
39450 | |
39451 | |
39452 | |
39453 | if (Src.getOpcode() == X86ISD::KSHIFTL) { |
39454 | if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) { |
39455 | unsigned C1 = Src.getConstantOperandVal(1); |
39456 | unsigned NewOpc = X86ISD::KSHIFTR; |
39457 | int Diff = ShiftAmt - C1; |
39458 | if (Diff < 0) { |
39459 | Diff = -Diff; |
39460 | NewOpc = X86ISD::KSHIFTL; |
39461 | } |
39462 | |
39463 | SDLoc dl(Op); |
39464 | SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); |
39465 | return TLO.CombineTo( |
39466 | Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); |
39467 | } |
39468 | } |
39469 | |
39470 | APInt DemandedSrc = DemandedElts.shl(ShiftAmt); |
39471 | if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, |
39472 | Depth + 1)) |
39473 | return true; |
39474 | |
39475 | KnownUndef.lshrInPlace(ShiftAmt); |
39476 | KnownZero.lshrInPlace(ShiftAmt); |
39477 | KnownZero.setHighBits(ShiftAmt); |
39478 | break; |
39479 | } |
39480 | case X86ISD::CVTSI2P: |
39481 | case X86ISD::CVTUI2P: { |
39482 | SDValue Src = Op.getOperand(0); |
39483 | MVT SrcVT = Src.getSimpleValueType(); |
39484 | APInt SrcUndef, SrcZero; |
39485 | APInt SrcElts = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); |
39486 | if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, |
39487 | Depth + 1)) |
39488 | return true; |
39489 | break; |
39490 | } |
39491 | case X86ISD::PACKSS: |
39492 | case X86ISD::PACKUS: { |
39493 | SDValue N0 = Op.getOperand(0); |
39494 | SDValue N1 = Op.getOperand(1); |
39495 | |
39496 | APInt DemandedLHS, DemandedRHS; |
39497 | getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); |
39498 | |
39499 | APInt LHSUndef, LHSZero; |
39500 | if (SimplifyDemandedVectorElts(N0, DemandedLHS, LHSUndef, LHSZero, TLO, |
39501 | Depth + 1)) |
39502 | return true; |
39503 | APInt RHSUndef, RHSZero; |
39504 | if (SimplifyDemandedVectorElts(N1, DemandedRHS, RHSUndef, RHSZero, TLO, |
39505 | Depth + 1)) |
39506 | return true; |
39507 | |
39508 | |
39509 | |
39510 | |
39511 | |
39512 | if (!DemandedElts.isAllOnesValue()) { |
39513 | SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS, |
39514 | TLO.DAG, Depth + 1); |
39515 | SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS, |
39516 | TLO.DAG, Depth + 1); |
39517 | if (NewN0 || NewN1) { |
39518 | NewN0 = NewN0 ? NewN0 : N0; |
39519 | NewN1 = NewN1 ? NewN1 : N1; |
39520 | return TLO.CombineTo(Op, |
39521 | TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); |
39522 | } |
39523 | } |
39524 | break; |
39525 | } |
39526 | case X86ISD::HADD: |
39527 | case X86ISD::HSUB: |
39528 | case X86ISD::FHADD: |
39529 | case X86ISD::FHSUB: { |
39530 | SDValue N0 = Op.getOperand(0); |
39531 | SDValue N1 = Op.getOperand(1); |
39532 | |
39533 | APInt DemandedLHS, DemandedRHS; |
39534 | getHorizDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); |
39535 | |
39536 | APInt LHSUndef, LHSZero; |
39537 | if (SimplifyDemandedVectorElts(N0, DemandedLHS, LHSUndef, LHSZero, TLO, |
39538 | Depth + 1)) |
39539 | return true; |
39540 | APInt RHSUndef, RHSZero; |
39541 | if (SimplifyDemandedVectorElts(N1, DemandedRHS, RHSUndef, RHSZero, TLO, |
39542 | Depth + 1)) |
39543 | return true; |
39544 | |
39545 | |
39546 | |
39547 | |
39548 | |
39549 | if (N0 != N1 && !DemandedElts.isAllOnesValue()) { |
39550 | SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS, |
39551 | TLO.DAG, Depth + 1); |
39552 | SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS, |
39553 | TLO.DAG, Depth + 1); |
39554 | if (NewN0 || NewN1) { |
39555 | NewN0 = NewN0 ? NewN0 : N0; |
39556 | NewN1 = NewN1 ? NewN1 : N1; |
39557 | return TLO.CombineTo(Op, |
39558 | TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); |
39559 | } |
39560 | } |
39561 | break; |
39562 | } |
39563 | case X86ISD::VTRUNC: |
39564 | case X86ISD::VTRUNCS: |
39565 | case X86ISD::VTRUNCUS: { |
39566 | SDValue Src = Op.getOperand(0); |
39567 | MVT SrcVT = Src.getSimpleValueType(); |
39568 | APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); |
39569 | APInt SrcUndef, SrcZero; |
39570 | if (SimplifyDemandedVectorElts(Src, DemandedSrc, SrcUndef, SrcZero, TLO, |
39571 | Depth + 1)) |
39572 | return true; |
39573 | KnownZero = SrcZero.zextOrTrunc(NumElts); |
39574 | KnownUndef = SrcUndef.zextOrTrunc(NumElts); |
39575 | break; |
39576 | } |
39577 | case X86ISD::BLENDV: { |
39578 | APInt SelUndef, SelZero; |
39579 | if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, SelUndef, |
39580 | SelZero, TLO, Depth + 1)) |
39581 | return true; |
39582 | |
39583 | |
39584 | APInt LHSUndef, LHSZero; |
39585 | if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, LHSUndef, |
39586 | LHSZero, TLO, Depth + 1)) |
39587 | return true; |
39588 | |
39589 | APInt RHSUndef, RHSZero; |
39590 | if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedElts, RHSUndef, |
39591 | RHSZero, TLO, Depth + 1)) |
39592 | return true; |
39593 | |
39594 | KnownZero = LHSZero & RHSZero; |
39595 | KnownUndef = LHSUndef & RHSUndef; |
39596 | break; |
39597 | } |
39598 | case X86ISD::VZEXT_MOVL: { |
39599 | |
39600 | SDValue Src = Op.getOperand(0); |
39601 | APInt DemandedUpperElts = DemandedElts; |
39602 | DemandedUpperElts.clearLowBits(1); |
39603 | if (TLO.DAG.computeKnownBits(Src, DemandedUpperElts, Depth + 1).isZero()) |
39604 | return TLO.CombineTo(Op, Src); |
39605 | break; |
39606 | } |
39607 | case X86ISD::VBROADCAST: { |
39608 | SDValue Src = Op.getOperand(0); |
39609 | MVT SrcVT = Src.getSimpleValueType(); |
39610 | if (!SrcVT.isVector()) |
39611 | break; |
39612 | |
39613 | if (DemandedElts == 1) { |
39614 | if (Src.getValueType() != VT) |
39615 | Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG, |
39616 | SDLoc(Op)); |
39617 | return TLO.CombineTo(Op, Src); |
39618 | } |
39619 | APInt SrcUndef, SrcZero; |
39620 | APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0); |
39621 | if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, |
39622 | Depth + 1)) |
39623 | return true; |
39624 | |
39625 | |
39626 | if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( |
39627 | Src, SrcElts, TLO.DAG, Depth + 1)) |
39628 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); |
39629 | break; |
39630 | } |
39631 | case X86ISD::VPERMV: |
39632 | if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 0, TLO, |
39633 | Depth)) |
39634 | return true; |
39635 | break; |
39636 | case X86ISD::PSHUFB: |
39637 | case X86ISD::VPERMV3: |
39638 | case X86ISD::VPERMILPV: |
39639 | if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 1, TLO, |
39640 | Depth)) |
39641 | return true; |
39642 | break; |
39643 | case X86ISD::VPPERM: |
39644 | case X86ISD::VPERMIL2: |
39645 | if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 2, TLO, |
39646 | Depth)) |
39647 | return true; |
39648 | break; |
39649 | } |
39650 | |
39651 | |
39652 | |
39653 | |
39654 | if ((VT.is256BitVector() || VT.is512BitVector()) && |
39655 | DemandedElts.lshr(NumElts / 2) == 0) { |
39656 | unsigned SizeInBits = VT.getSizeInBits(); |
39657 | unsigned ExtSizeInBits = SizeInBits / 2; |
39658 | |
39659 | |
39660 | if (VT.is512BitVector() && DemandedElts.lshr(NumElts / 4) == 0) |
39661 | ExtSizeInBits = SizeInBits / 4; |
39662 | |
39663 | switch (Opc) { |
39664 | |
39665 | case X86ISD::VBROADCAST: { |
39666 | SDLoc DL(Op); |
39667 | SDValue Src = Op.getOperand(0); |
39668 | if (Src.getValueSizeInBits() > ExtSizeInBits) |
39669 | Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits); |
39670 | EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), |
39671 | ExtSizeInBits / VT.getScalarSizeInBits()); |
39672 | SDValue Bcst = TLO.DAG.getNode(X86ISD::VBROADCAST, DL, BcstVT, Src); |
39673 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, |
39674 | TLO.DAG, DL, ExtSizeInBits)); |
39675 | } |
39676 | case X86ISD::VBROADCAST_LOAD: { |
39677 | SDLoc DL(Op); |
39678 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
39679 | EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), |
39680 | ExtSizeInBits / VT.getScalarSizeInBits()); |
39681 | SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other); |
39682 | SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)}; |
39683 | SDValue Bcst = TLO.DAG.getMemIntrinsicNode( |
39684 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MemIntr->getMemoryVT(), |
39685 | MemIntr->getMemOperand()); |
39686 | TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), |
39687 | Bcst.getValue(1)); |
39688 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, |
39689 | TLO.DAG, DL, ExtSizeInBits)); |
39690 | } |
39691 | |
39692 | case X86ISD::SUBV_BROADCAST_LOAD: { |
39693 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
39694 | EVT MemVT = MemIntr->getMemoryVT(); |
39695 | if (ExtSizeInBits == MemVT.getStoreSizeInBits()) { |
39696 | SDLoc DL(Op); |
39697 | SDValue Ld = |
39698 | TLO.DAG.getLoad(MemVT, DL, MemIntr->getChain(), |
39699 | MemIntr->getBasePtr(), MemIntr->getMemOperand()); |
39700 | TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), |
39701 | Ld.getValue(1)); |
39702 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ld, 0, |
39703 | TLO.DAG, DL, ExtSizeInBits)); |
39704 | } else if ((ExtSizeInBits % MemVT.getStoreSizeInBits()) == 0) { |
39705 | SDLoc DL(Op); |
39706 | EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), |
39707 | ExtSizeInBits / VT.getScalarSizeInBits()); |
39708 | SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other); |
39709 | SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)}; |
39710 | SDValue Bcst = |
39711 | TLO.DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, |
39712 | Ops, MemVT, MemIntr->getMemOperand()); |
39713 | TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), |
39714 | Bcst.getValue(1)); |
39715 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, |
39716 | TLO.DAG, DL, ExtSizeInBits)); |
39717 | } |
39718 | break; |
39719 | } |
39720 | |
39721 | case X86ISD::VSHLDQ: |
39722 | case X86ISD::VSRLDQ: |
39723 | |
39724 | case X86ISD::VSHL: |
39725 | case X86ISD::VSRL: |
39726 | case X86ISD::VSRA: |
39727 | |
39728 | case X86ISD::VSHLI: |
39729 | case X86ISD::VSRLI: |
39730 | case X86ISD::VSRAI: { |
39731 | SDLoc DL(Op); |
39732 | SDValue Ext0 = |
39733 | extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); |
39734 | SDValue ExtOp = |
39735 | TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0, Op.getOperand(1)); |
39736 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
39737 | SDValue Insert = |
39738 | insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); |
39739 | return TLO.CombineTo(Op, Insert); |
39740 | } |
39741 | case X86ISD::VPERMI: { |
39742 | |
39743 | |
39744 | if (VT == MVT::v4f64 || VT == MVT::v4i64) { |
39745 | SmallVector<int, 4> Mask; |
39746 | DecodeVPERMMask(NumElts, Op.getConstantOperandVal(1), Mask); |
39747 | if (isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3)) { |
39748 | SDLoc DL(Op); |
39749 | SDValue Ext = extractSubVector(Op.getOperand(0), 2, TLO.DAG, DL, 128); |
39750 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
39751 | SDValue Insert = insertSubVector(UndefVec, Ext, 0, TLO.DAG, DL, 128); |
39752 | return TLO.CombineTo(Op, Insert); |
39753 | } |
39754 | } |
39755 | break; |
39756 | } |
39757 | case X86ISD::VPERM2X128: { |
39758 | |
39759 | SDLoc DL(Op); |
39760 | unsigned LoMask = Op.getConstantOperandVal(2) & 0xF; |
39761 | if (LoMask & 0x8) |
39762 | return TLO.CombineTo( |
39763 | Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, DL)); |
39764 | unsigned EltIdx = (LoMask & 0x1) * (NumElts / 2); |
39765 | unsigned SrcIdx = (LoMask & 0x2) >> 1; |
39766 | SDValue ExtOp = |
39767 | extractSubVector(Op.getOperand(SrcIdx), EltIdx, TLO.DAG, DL, 128); |
39768 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
39769 | SDValue Insert = |
39770 | insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); |
39771 | return TLO.CombineTo(Op, Insert); |
39772 | } |
39773 | |
39774 | case X86ISD::VZEXT_MOVL: |
39775 | |
39776 | case X86ISD::PSHUFD: |
39777 | case X86ISD::PSHUFLW: |
39778 | case X86ISD::PSHUFHW: |
39779 | case X86ISD::VPERMILPI: |
39780 | |
39781 | case X86ISD::VPERMILPV: |
39782 | case X86ISD::VPERMIL2: |
39783 | case X86ISD::PSHUFB: |
39784 | case X86ISD::UNPCKL: |
39785 | case X86ISD::UNPCKH: |
39786 | case X86ISD::BLENDI: |
39787 | |
39788 | case X86ISD::AVG: |
39789 | case X86ISD::PACKSS: |
39790 | case X86ISD::PACKUS: |
39791 | |
39792 | case X86ISD::HADD: |
39793 | case X86ISD::HSUB: |
39794 | case X86ISD::FHADD: |
39795 | case X86ISD::FHSUB: { |
39796 | SDLoc DL(Op); |
39797 | SmallVector<SDValue, 4> Ops; |
39798 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
39799 | SDValue SrcOp = Op.getOperand(i); |
39800 | EVT SrcVT = SrcOp.getValueType(); |
39801 | assert((!SrcVT.isVector() || SrcVT.getSizeInBits() == SizeInBits) && |
39802 | "Unsupported vector size"); |
39803 | Ops.push_back(SrcVT.isVector() ? extractSubVector(SrcOp, 0, TLO.DAG, DL, |
39804 | ExtSizeInBits) |
39805 | : SrcOp); |
39806 | } |
39807 | MVT ExtVT = VT.getSimpleVT(); |
39808 | ExtVT = MVT::getVectorVT(ExtVT.getScalarType(), |
39809 | ExtSizeInBits / ExtVT.getScalarSizeInBits()); |
39810 | SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ops); |
39811 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
39812 | SDValue Insert = |
39813 | insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); |
39814 | return TLO.CombineTo(Op, Insert); |
39815 | } |
39816 | } |
39817 | } |
39818 | |
39819 | |
39820 | APInt OpUndef, OpZero; |
39821 | SmallVector<int, 64> OpMask; |
39822 | SmallVector<SDValue, 2> OpInputs; |
39823 | if (!getTargetShuffleInputs(Op, DemandedElts, OpInputs, OpMask, OpUndef, |
39824 | OpZero, TLO.DAG, Depth, false)) |
39825 | return false; |
39826 | |
39827 | |
39828 | if (OpMask.size() != (unsigned)NumElts || |
39829 | llvm::any_of(OpInputs, [VT](SDValue V) { |
39830 | return VT.getSizeInBits() != V.getValueSizeInBits() || |
39831 | !V.getValueType().isVector(); |
39832 | })) |
39833 | return false; |
39834 | |
39835 | KnownZero = OpZero; |
39836 | KnownUndef = OpUndef; |
39837 | |
39838 | |
39839 | int NumSrcs = OpInputs.size(); |
39840 | for (int i = 0; i != NumElts; ++i) |
39841 | if (!DemandedElts[i]) |
39842 | OpMask[i] = SM_SentinelUndef; |
39843 | |
39844 | if (isUndefInRange(OpMask, 0, NumElts)) { |
39845 | KnownUndef.setAllBits(); |
39846 | return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); |
39847 | } |
39848 | if (isUndefOrZeroInRange(OpMask, 0, NumElts)) { |
39849 | KnownZero.setAllBits(); |
39850 | return TLO.CombineTo( |
39851 | Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op))); |
39852 | } |
39853 | for (int Src = 0; Src != NumSrcs; ++Src) |
39854 | if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts)) |
39855 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, OpInputs[Src])); |
39856 | |
39857 | |
39858 | for (int Src = 0; Src != NumSrcs; ++Src) { |
39859 | |
39860 | if (OpInputs[Src].getValueType() != VT) |
39861 | continue; |
39862 | |
39863 | int Lo = Src * NumElts; |
39864 | APInt SrcElts = APInt::getNullValue(NumElts); |
39865 | for (int i = 0; i != NumElts; ++i) |
39866 | if (DemandedElts[i]) { |
39867 | int M = OpMask[i] - Lo; |
39868 | if (0 <= M && M < NumElts) |
39869 | SrcElts.setBit(M); |
39870 | } |
39871 | |
39872 | |
39873 | APInt SrcUndef, SrcZero; |
39874 | if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero, |
39875 | TLO, Depth + 1)) |
39876 | return true; |
39877 | } |
39878 | |
39879 | |
39880 | |
39881 | |
39882 | |
39883 | |
39884 | |
39885 | |
39886 | if (!DemandedElts.isAllOnesValue()) { |
39887 | assert(Depth < X86::MaxShuffleCombineDepth && "Depth out of range"); |
39888 | |
39889 | SmallVector<int, 64> DemandedMask(NumElts, SM_SentinelUndef); |
39890 | for (int i = 0; i != NumElts; ++i) |
39891 | if (DemandedElts[i]) |
39892 | DemandedMask[i] = i; |
39893 | |
39894 | SDValue NewShuffle = combineX86ShufflesRecursively( |
39895 | {Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth, |
39896 | false, |
39897 | true, true, TLO.DAG, |
39898 | Subtarget); |
39899 | if (NewShuffle) |
39900 | return TLO.CombineTo(Op, NewShuffle); |
39901 | } |
39902 | |
39903 | return false; |
39904 | } |
39905 | |
39906 | bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( |
39907 | SDValue Op, const APInt &OriginalDemandedBits, |
39908 | const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, |
39909 | unsigned Depth) const { |
39910 | EVT VT = Op.getValueType(); |
39911 | unsigned BitWidth = OriginalDemandedBits.getBitWidth(); |
39912 | unsigned Opc = Op.getOpcode(); |
39913 | switch(Opc) { |
39914 | case X86ISD::VTRUNC: { |
39915 | KnownBits KnownOp; |
39916 | SDValue Src = Op.getOperand(0); |
39917 | MVT SrcVT = Src.getSimpleValueType(); |
39918 | |
39919 | |
39920 | APInt TruncMask = OriginalDemandedBits.zext(SrcVT.getScalarSizeInBits()); |
39921 | APInt DemandedElts = OriginalDemandedElts.trunc(SrcVT.getVectorNumElements()); |
39922 | if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO, Depth + 1)) |
39923 | return true; |
39924 | break; |
39925 | } |
39926 | case X86ISD::PMULDQ: |
39927 | case X86ISD::PMULUDQ: { |
39928 | |
39929 | KnownBits KnownOp; |
39930 | SDValue LHS = Op.getOperand(0); |
39931 | SDValue RHS = Op.getOperand(1); |
39932 | |
39933 | APInt DemandedMask = APInt::getLowBitsSet(64, 32); |
39934 | if (SimplifyDemandedBits(LHS, DemandedMask, OriginalDemandedElts, KnownOp, |
39935 | TLO, Depth + 1)) |
39936 | return true; |
39937 | if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp, |
39938 | TLO, Depth + 1)) |
39939 | return true; |
39940 | |
39941 | |
39942 | SDValue DemandedLHS = SimplifyMultipleUseDemandedBits( |
39943 | LHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); |
39944 | SDValue DemandedRHS = SimplifyMultipleUseDemandedBits( |
39945 | RHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); |
39946 | if (DemandedLHS || DemandedRHS) { |
39947 | DemandedLHS = DemandedLHS ? DemandedLHS : LHS; |
39948 | DemandedRHS = DemandedRHS ? DemandedRHS : RHS; |
39949 | return TLO.CombineTo( |
39950 | Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, DemandedLHS, DemandedRHS)); |
39951 | } |
39952 | break; |
39953 | } |
39954 | case X86ISD::VSHLI: { |
39955 | SDValue Op0 = Op.getOperand(0); |
39956 | |
39957 | unsigned ShAmt = Op.getConstantOperandVal(1); |
39958 | if (ShAmt >= BitWidth) |
39959 | break; |
39960 | |
39961 | APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt); |
39962 | |
39963 | |
39964 | |
39965 | |
39966 | if (Op0.getOpcode() == X86ISD::VSRLI && |
39967 | OriginalDemandedBits.countTrailingZeros() >= ShAmt) { |
39968 | unsigned Shift2Amt = Op0.getConstantOperandVal(1); |
39969 | if (Shift2Amt < BitWidth) { |
39970 | int Diff = ShAmt - Shift2Amt; |
39971 | if (Diff == 0) |
39972 | return TLO.CombineTo(Op, Op0.getOperand(0)); |
39973 | |
39974 | unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI; |
39975 | SDValue NewShift = TLO.DAG.getNode( |
39976 | NewOpc, SDLoc(Op), VT, Op0.getOperand(0), |
39977 | TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); |
39978 | return TLO.CombineTo(Op, NewShift); |
39979 | } |
39980 | } |
39981 | |
39982 | |
39983 | unsigned NumSignBits = |
39984 | TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1); |
39985 | unsigned UpperDemandedBits = |
39986 | BitWidth - OriginalDemandedBits.countTrailingZeros(); |
39987 | if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits) |
39988 | return TLO.CombineTo(Op, Op0); |
39989 | |
39990 | if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known, |
39991 | TLO, Depth + 1)) |
39992 | return true; |
39993 | |
39994 | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
39995 | Known.Zero <<= ShAmt; |
39996 | Known.One <<= ShAmt; |
39997 | |
39998 | |
39999 | Known.Zero.setLowBits(ShAmt); |
40000 | return false; |
40001 | } |
40002 | case X86ISD::VSRLI: { |
40003 | unsigned ShAmt = Op.getConstantOperandVal(1); |
40004 | if (ShAmt >= BitWidth) |
40005 | break; |
40006 | |
40007 | APInt DemandedMask = OriginalDemandedBits << ShAmt; |
40008 | |
40009 | if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, |
40010 | OriginalDemandedElts, Known, TLO, Depth + 1)) |
40011 | return true; |
40012 | |
40013 | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
40014 | Known.Zero.lshrInPlace(ShAmt); |
40015 | Known.One.lshrInPlace(ShAmt); |
40016 | |
40017 | |
40018 | Known.Zero.setHighBits(ShAmt); |
40019 | return false; |
40020 | } |
40021 | case X86ISD::VSRAI: { |
40022 | SDValue Op0 = Op.getOperand(0); |
40023 | SDValue Op1 = Op.getOperand(1); |
40024 | |
40025 | unsigned ShAmt = cast<ConstantSDNode>(Op1)->getZExtValue(); |
40026 | if (ShAmt >= BitWidth) |
40027 | break; |
40028 | |
40029 | APInt DemandedMask = OriginalDemandedBits << ShAmt; |
40030 | |
40031 | |
40032 | if (OriginalDemandedBits.isSignMask()) |
40033 | return TLO.CombineTo(Op, Op0); |
40034 | |
40035 | |
40036 | if (Op0.getOpcode() == X86ISD::VSHLI && |
40037 | Op.getOperand(1) == Op0.getOperand(1)) { |
40038 | SDValue Op00 = Op0.getOperand(0); |
40039 | unsigned NumSignBits = |
40040 | TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts); |
40041 | if (ShAmt < NumSignBits) |
40042 | return TLO.CombineTo(Op, Op00); |
40043 | } |
40044 | |
40045 | |
40046 | |
40047 | if (OriginalDemandedBits.countLeadingZeros() < ShAmt) |
40048 | DemandedMask.setSignBit(); |
40049 | |
40050 | if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known, |
40051 | TLO, Depth + 1)) |
40052 | return true; |
40053 | |
40054 | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
40055 | Known.Zero.lshrInPlace(ShAmt); |
40056 | Known.One.lshrInPlace(ShAmt); |
40057 | |
40058 | |
40059 | |
40060 | if (Known.Zero[BitWidth - ShAmt - 1] || |
40061 | OriginalDemandedBits.countLeadingZeros() >= ShAmt) |
40062 | return TLO.CombineTo( |
40063 | Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1)); |
40064 | |
40065 | |
40066 | if (Known.One[BitWidth - ShAmt - 1]) |
40067 | Known.One.setHighBits(ShAmt); |
40068 | return false; |
40069 | } |
40070 | case X86ISD::PEXTRB: |
40071 | case X86ISD::PEXTRW: { |
40072 | SDValue Vec = Op.getOperand(0); |
40073 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
40074 | MVT VecVT = Vec.getSimpleValueType(); |
40075 | unsigned NumVecElts = VecVT.getVectorNumElements(); |
40076 | |
40077 | if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) { |
40078 | unsigned Idx = CIdx->getZExtValue(); |
40079 | unsigned VecBitWidth = VecVT.getScalarSizeInBits(); |
40080 | |
40081 | |
40082 | |
40083 | APInt DemandedVecBits = OriginalDemandedBits.trunc(VecBitWidth); |
40084 | if (DemandedVecBits == 0) |
40085 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
40086 | |
40087 | APInt KnownUndef, KnownZero; |
40088 | APInt DemandedVecElts = APInt::getOneBitSet(NumVecElts, Idx); |
40089 | if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef, |
40090 | KnownZero, TLO, Depth + 1)) |
40091 | return true; |
40092 | |
40093 | KnownBits KnownVec; |
40094 | if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts, |
40095 | KnownVec, TLO, Depth + 1)) |
40096 | return true; |
40097 | |
40098 | if (SDValue V = SimplifyMultipleUseDemandedBits( |
40099 | Vec, DemandedVecBits, DemandedVecElts, TLO.DAG, Depth + 1)) |
40100 | return TLO.CombineTo( |
40101 | Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1))); |
40102 | |
40103 | Known = KnownVec.zext(BitWidth); |
40104 | return false; |
40105 | } |
40106 | break; |
40107 | } |
40108 | case X86ISD::PINSRB: |
40109 | case X86ISD::PINSRW: { |
40110 | SDValue Vec = Op.getOperand(0); |
40111 | SDValue Scl = Op.getOperand(1); |
40112 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
40113 | MVT VecVT = Vec.getSimpleValueType(); |
40114 | |
40115 | if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) { |
40116 | unsigned Idx = CIdx->getZExtValue(); |
40117 | if (!OriginalDemandedElts[Idx]) |
40118 | return TLO.CombineTo(Op, Vec); |
40119 | |
40120 | KnownBits KnownVec; |
40121 | APInt DemandedVecElts(OriginalDemandedElts); |
40122 | DemandedVecElts.clearBit(Idx); |
40123 | if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts, |
40124 | KnownVec, TLO, Depth + 1)) |
40125 | return true; |
40126 | |
40127 | KnownBits KnownScl; |
40128 | unsigned NumSclBits = Scl.getScalarValueSizeInBits(); |
40129 | APInt DemandedSclBits = OriginalDemandedBits.zext(NumSclBits); |
40130 | if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1)) |
40131 | return true; |
40132 | |
40133 | KnownScl = KnownScl.trunc(VecVT.getScalarSizeInBits()); |
40134 | Known = KnownBits::commonBits(KnownVec, KnownScl); |
40135 | return false; |
40136 | } |
40137 | break; |
40138 | } |
40139 | case X86ISD::PACKSS: |
40140 | |
40141 | |
40142 | |
40143 | if (OriginalDemandedBits.isSignMask()) { |
40144 | APInt DemandedLHS, DemandedRHS; |
40145 | getPackDemandedElts(VT, OriginalDemandedElts, DemandedLHS, DemandedRHS); |
40146 | |
40147 | KnownBits KnownLHS, KnownRHS; |
40148 | APInt SignMask = APInt::getSignMask(BitWidth * 2); |
40149 | if (SimplifyDemandedBits(Op.getOperand(0), SignMask, DemandedLHS, |
40150 | KnownLHS, TLO, Depth + 1)) |
40151 | return true; |
40152 | if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS, |
40153 | KnownRHS, TLO, Depth + 1)) |
40154 | return true; |
40155 | |
40156 | |
40157 | SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( |
40158 | Op.getOperand(0), SignMask, DemandedLHS, TLO.DAG, Depth + 1); |
40159 | SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( |
40160 | Op.getOperand(1), SignMask, DemandedRHS, TLO.DAG, Depth + 1); |
40161 | if (DemandedOp0 || DemandedOp1) { |
40162 | SDValue Op0 = DemandedOp0 ? DemandedOp0 : Op.getOperand(0); |
40163 | SDValue Op1 = DemandedOp1 ? DemandedOp1 : Op.getOperand(1); |
40164 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, Op0, Op1)); |
40165 | } |
40166 | } |
40167 | |
40168 | break; |
40169 | case X86ISD::VBROADCAST: { |
40170 | SDValue Src = Op.getOperand(0); |
40171 | MVT SrcVT = Src.getSimpleValueType(); |
40172 | APInt DemandedElts = APInt::getOneBitSet( |
40173 | SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1, 0); |
40174 | if (SimplifyDemandedBits(Src, OriginalDemandedBits, DemandedElts, Known, |
40175 | TLO, Depth + 1)) |
40176 | return true; |
40177 | |
40178 | |
40179 | |
40180 | if ((BitWidth == 64) && SrcVT.isScalarInteger() && !Subtarget.hasAVX512() && |
40181 | OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2)) { |
40182 | MVT NewSrcVT = MVT::getIntegerVT(BitWidth / 2); |
40183 | SDValue NewSrc = |
40184 | TLO.DAG.getNode(ISD::TRUNCATE, SDLoc(Src), NewSrcVT, Src); |
40185 | MVT NewVT = MVT::getVectorVT(NewSrcVT, VT.getVectorNumElements() * 2); |
40186 | SDValue NewBcst = |
40187 | TLO.DAG.getNode(X86ISD::VBROADCAST, SDLoc(Op), NewVT, NewSrc); |
40188 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, NewBcst)); |
40189 | } |
40190 | break; |
40191 | } |
40192 | case X86ISD::PCMPGT: |
40193 | |
40194 | |
40195 | if (OriginalDemandedBits.isSignMask() && |
40196 | ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) |
40197 | return TLO.CombineTo(Op, Op.getOperand(1)); |
40198 | break; |
40199 | case X86ISD::MOVMSK: { |
40200 | SDValue Src = Op.getOperand(0); |
40201 | MVT SrcVT = Src.getSimpleValueType(); |
40202 | unsigned SrcBits = SrcVT.getScalarSizeInBits(); |
40203 | unsigned NumElts = SrcVT.getVectorNumElements(); |
40204 | |
40205 | |
40206 | if (OriginalDemandedBits.countTrailingZeros() >= NumElts) |
40207 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
40208 | |
40209 | |
40210 | APInt KnownUndef, KnownZero; |
40211 | APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts); |
40212 | if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, |
40213 | TLO, Depth + 1)) |
40214 | return true; |
40215 | |
40216 | Known.Zero = KnownZero.zextOrSelf(BitWidth); |
40217 | Known.Zero.setHighBits(BitWidth - NumElts); |
40218 | |
40219 | |
40220 | KnownBits KnownSrc; |
40221 | APInt DemandedSrcBits = APInt::getSignMask(SrcBits); |
40222 | if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO, |
40223 | Depth + 1)) |
40224 | return true; |
40225 | |
40226 | if (KnownSrc.One[SrcBits - 1]) |
40227 | Known.One.setLowBits(NumElts); |
40228 | else if (KnownSrc.Zero[SrcBits - 1]) |
40229 | Known.Zero.setLowBits(NumElts); |
40230 | |
40231 | |
40232 | if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( |
40233 | Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1)) |
40234 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); |
40235 | return false; |
40236 | } |
40237 | case X86ISD::BEXTR: |
40238 | case X86ISD::BEXTRI: { |
40239 | SDValue Op0 = Op.getOperand(0); |
40240 | SDValue Op1 = Op.getOperand(1); |
40241 | |
40242 | |
40243 | if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) { |
40244 | |
40245 | uint64_t Val1 = Cst1->getZExtValue(); |
40246 | uint64_t MaskedVal1 = Val1 & 0xFFFF; |
40247 | if (Opc == X86ISD::BEXTR && MaskedVal1 != Val1) { |
40248 | SDLoc DL(Op); |
40249 | return TLO.CombineTo( |
40250 | Op, TLO.DAG.getNode(X86ISD::BEXTR, DL, VT, Op0, |
40251 | TLO.DAG.getConstant(MaskedVal1, DL, VT))); |
40252 | } |
40253 | |
40254 | unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0); |
40255 | unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8); |
40256 | |
40257 | |
40258 | if (Length == 0) { |
40259 | Known.setAllZero(); |
40260 | return false; |
40261 | } |
40262 | |
40263 | if ((Shift + Length) <= BitWidth) { |
40264 | APInt DemandedMask = APInt::getBitsSet(BitWidth, Shift, Shift + Length); |
40265 | if (SimplifyDemandedBits(Op0, DemandedMask, Known, TLO, Depth + 1)) |
40266 | return true; |
40267 | |
40268 | Known = Known.extractBits(Length, Shift); |
40269 | Known = Known.zextOrTrunc(BitWidth); |
40270 | return false; |
40271 | } |
40272 | } else { |
40273 | assert(Opc == X86ISD::BEXTR && "Unexpected opcode!"); |
40274 | KnownBits Known1; |
40275 | APInt DemandedMask(APInt::getLowBitsSet(BitWidth, 16)); |
40276 | if (SimplifyDemandedBits(Op1, DemandedMask, Known1, TLO, Depth + 1)) |
40277 | return true; |
40278 | |
40279 | |
40280 | KnownBits LengthBits = Known1.extractBits(8, 8); |
40281 | if (LengthBits.isZero()) |
40282 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
40283 | } |
40284 | |
40285 | break; |
40286 | } |
40287 | case X86ISD::PDEP: { |
40288 | SDValue Op0 = Op.getOperand(0); |
40289 | SDValue Op1 = Op.getOperand(1); |
40290 | |
40291 | unsigned DemandedBitsLZ = OriginalDemandedBits.countLeadingZeros(); |
40292 | APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); |
40293 | |
40294 | |
40295 | |
40296 | if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1)) |
40297 | return true; |
40298 | |
40299 | |
40300 | |
40301 | |
40302 | KnownBits Known2; |
40303 | uint64_t Count = (~Known.Zero & LoMask).countPopulation(); |
40304 | APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count)); |
40305 | if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1)) |
40306 | return true; |
40307 | |
40308 | |
40309 | Known.One.clearAllBits(); |
40310 | |
40311 | |
40312 | Known.Zero.setLowBits(Known2.countMinTrailingZeros()); |
40313 | return false; |
40314 | } |
40315 | } |
40316 | |
40317 | return TargetLowering::SimplifyDemandedBitsForTargetNode( |
40318 | Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); |
40319 | } |
40320 | |
40321 | SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( |
40322 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
40323 | SelectionDAG &DAG, unsigned Depth) const { |
40324 | int NumElts = DemandedElts.getBitWidth(); |
40325 | unsigned Opc = Op.getOpcode(); |
40326 | EVT VT = Op.getValueType(); |
40327 | |
40328 | switch (Opc) { |
40329 | case X86ISD::PINSRB: |
40330 | case X86ISD::PINSRW: { |
40331 | |
40332 | SDValue Vec = Op.getOperand(0); |
40333 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
40334 | MVT VecVT = Vec.getSimpleValueType(); |
40335 | if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && |
40336 | !DemandedElts[CIdx->getZExtValue()]) |
40337 | return Vec; |
40338 | break; |
40339 | } |
40340 | case X86ISD::VSHLI: { |
40341 | |
40342 | |
40343 | SDValue Op0 = Op.getOperand(0); |
40344 | unsigned ShAmt = Op.getConstantOperandVal(1); |
40345 | unsigned BitWidth = DemandedBits.getBitWidth(); |
40346 | unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); |
40347 | unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); |
40348 | if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits) |
40349 | return Op0; |
40350 | break; |
40351 | } |
40352 | case X86ISD::VSRAI: |
40353 | |
40354 | |
40355 | if (DemandedBits.isSignMask()) |
40356 | return Op.getOperand(0); |
40357 | break; |
40358 | case X86ISD::PCMPGT: |
40359 | |
40360 | |
40361 | if (DemandedBits.isSignMask() && |
40362 | ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) |
40363 | return Op.getOperand(1); |
40364 | break; |
40365 | } |
40366 | |
40367 | APInt ShuffleUndef, ShuffleZero; |
40368 | SmallVector<int, 16> ShuffleMask; |
40369 | SmallVector<SDValue, 2> ShuffleOps; |
40370 | if (getTargetShuffleInputs(Op, DemandedElts, ShuffleOps, ShuffleMask, |
40371 | ShuffleUndef, ShuffleZero, DAG, Depth, false)) { |
40372 | |
40373 | |
40374 | int NumOps = ShuffleOps.size(); |
40375 | if (ShuffleMask.size() == (unsigned)NumElts && |
40376 | llvm::all_of(ShuffleOps, [VT](SDValue V) { |
40377 | return VT.getSizeInBits() == V.getValueSizeInBits(); |
40378 | })) { |
40379 | |
40380 | if (DemandedElts.isSubsetOf(ShuffleUndef)) |
40381 | return DAG.getUNDEF(VT); |
40382 | if (DemandedElts.isSubsetOf(ShuffleUndef | ShuffleZero)) |
40383 | return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op)); |
40384 | |
40385 | |
40386 | APInt IdentityOp = APInt::getAllOnesValue(NumOps); |
40387 | for (int i = 0; i != NumElts; ++i) { |
40388 | int M = ShuffleMask[i]; |
40389 | if (!DemandedElts[i] || ShuffleUndef[i]) |
40390 | continue; |
40391 | int OpIdx = M / NumElts; |
40392 | int EltIdx = M % NumElts; |
40393 | if (M < 0 || EltIdx != i) { |
40394 | IdentityOp.clearAllBits(); |
40395 | break; |
40396 | } |
40397 | IdentityOp &= APInt::getOneBitSet(NumOps, OpIdx); |
40398 | if (IdentityOp == 0) |
40399 | break; |
40400 | } |
40401 | assert((IdentityOp == 0 || IdentityOp.countPopulation() == 1) && |
40402 | "Multiple identity shuffles detected"); |
40403 | |
40404 | if (IdentityOp != 0) |
40405 | return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countTrailingZeros()]); |
40406 | } |
40407 | } |
40408 | |
40409 | return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( |
40410 | Op, DemandedBits, DemandedElts, DAG, Depth); |
40411 | } |
40412 | |
40413 | |
40414 | |
40415 | static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, |
40416 | bool AllowTruncate) { |
40417 | switch (Src.getOpcode()) { |
40418 | case ISD::TRUNCATE: |
40419 | if (!AllowTruncate) |
40420 | return false; |
40421 | LLVM_FALLTHROUGH; |
40422 | case ISD::SETCC: |
40423 | return Src.getOperand(0).getValueSizeInBits() == Size; |
40424 | case ISD::AND: |
40425 | case ISD::XOR: |
40426 | case ISD::OR: |
40427 | return checkBitcastSrcVectorSize(Src.getOperand(0), Size, AllowTruncate) && |
40428 | checkBitcastSrcVectorSize(Src.getOperand(1), Size, AllowTruncate); |
40429 | } |
40430 | return false; |
40431 | } |
40432 | |
40433 | |
40434 | static unsigned getAltBitOpcode(unsigned Opcode) { |
40435 | switch(Opcode) { |
40436 | case ISD::AND: return X86ISD::FAND; |
40437 | case ISD::OR: return X86ISD::FOR; |
40438 | case ISD::XOR: return X86ISD::FXOR; |
40439 | case X86ISD::ANDNP: return X86ISD::FANDN; |
40440 | } |
40441 | llvm_unreachable("Unknown bitwise opcode"); |
40442 | } |
40443 | |
40444 | |
40445 | static SDValue adjustBitcastSrcVectorSSE1(SelectionDAG &DAG, SDValue Src, |
40446 | const SDLoc &DL) { |
40447 | EVT SrcVT = Src.getValueType(); |
40448 | if (SrcVT != MVT::v4i1) |
40449 | return SDValue(); |
40450 | |
40451 | switch (Src.getOpcode()) { |
40452 | case ISD::SETCC: |
40453 | if (Src.getOperand(0).getValueType() == MVT::v4i32 && |
40454 | ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) && |
40455 | cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT) { |
40456 | SDValue Op0 = Src.getOperand(0); |
40457 | if (ISD::isNormalLoad(Op0.getNode())) |
40458 | return DAG.getBitcast(MVT::v4f32, Op0); |
40459 | if (Op0.getOpcode() == ISD::BITCAST && |
40460 | Op0.getOperand(0).getValueType() == MVT::v4f32) |
40461 | return Op0.getOperand(0); |
40462 | } |
40463 | break; |
40464 | case ISD::AND: |
40465 | case ISD::XOR: |
40466 | case ISD::OR: { |
40467 | SDValue Op0 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(0), DL); |
40468 | SDValue Op1 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(1), DL); |
40469 | if (Op0 && Op1) |
40470 | return DAG.getNode(getAltBitOpcode(Src.getOpcode()), DL, MVT::v4f32, Op0, |
40471 | Op1); |
40472 | break; |
40473 | } |
40474 | } |
40475 | return SDValue(); |
40476 | } |
40477 | |
40478 | |
40479 | static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, |
40480 | SDValue Src, const SDLoc &DL) { |
40481 | switch (Src.getOpcode()) { |
40482 | case ISD::SETCC: |
40483 | case ISD::TRUNCATE: |
40484 | return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); |
40485 | case ISD::AND: |
40486 | case ISD::XOR: |
40487 | case ISD::OR: |
40488 | return DAG.getNode( |
40489 | Src.getOpcode(), DL, SExtVT, |
40490 | signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL), |
40491 | signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL)); |
40492 | } |
40493 | llvm_unreachable("Unexpected node type for vXi1 sign extension"); |
40494 | } |
40495 | |
40496 | |
40497 | |
40498 | |
40499 | |
40500 | |
40501 | |
40502 | static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, |
40503 | const SDLoc &DL, |
40504 | const X86Subtarget &Subtarget) { |
40505 | EVT SrcVT = Src.getValueType(); |
40506 | if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) |
40507 | return SDValue(); |
40508 | |
40509 | |
40510 | |
40511 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2()) { |
40512 | if (SDValue V = adjustBitcastSrcVectorSSE1(DAG, Src, DL)) { |
40513 | V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, |
40514 | DAG.getBitcast(MVT::v4f32, V)); |
40515 | return DAG.getZExtOrTrunc(V, DL, VT); |
40516 | } |
40517 | } |
40518 | |
40519 | |
40520 | |
40521 | |
40522 | |
40523 | bool PreferMovMsk = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() && |
40524 | (Src.getOperand(0).getValueType() == MVT::v16i8 || |
40525 | Src.getOperand(0).getValueType() == MVT::v32i8 || |
40526 | Src.getOperand(0).getValueType() == MVT::v64i8); |
40527 | |
40528 | |
40529 | |
40530 | if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() && |
40531 | cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT && |
40532 | ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) { |
40533 | EVT CmpVT = Src.getOperand(0).getValueType(); |
40534 | EVT EltVT = CmpVT.getVectorElementType(); |
40535 | if (CmpVT.getSizeInBits() <= 256 && |
40536 | (EltVT == MVT::i8 || EltVT == MVT::i32 || EltVT == MVT::i64)) |
40537 | PreferMovMsk = true; |
40538 | } |
40539 | |
40540 | |
40541 | |
40542 | if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !PreferMovMsk)) |
40543 | return SDValue(); |
40544 | |
40545 | |
40546 | |
40547 | |
40548 | |
40549 | |
40550 | |
40551 | |
40552 | |
40553 | |
40554 | |
40555 | MVT SExtVT; |
40556 | bool PropagateSExt = false; |
40557 | switch (SrcVT.getSimpleVT().SimpleTy) { |
40558 | default: |
40559 | return SDValue(); |
40560 | case MVT::v2i1: |
40561 | SExtVT = MVT::v2i64; |
40562 | break; |
40563 | case MVT::v4i1: |
40564 | SExtVT = MVT::v4i32; |
40565 | |
40566 | |
40567 | if (Subtarget.hasAVX() && |
40568 | checkBitcastSrcVectorSize(Src, 256, Subtarget.hasAVX2())) { |
40569 | SExtVT = MVT::v4i64; |
40570 | PropagateSExt = true; |
40571 | } |
40572 | break; |
40573 | case MVT::v8i1: |
40574 | SExtVT = MVT::v8i16; |
40575 | |
40576 | |
40577 | |
40578 | |
40579 | |
40580 | if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256, true) || |
40581 | checkBitcastSrcVectorSize(Src, 512, true))) { |
40582 | SExtVT = MVT::v8i32; |
40583 | PropagateSExt = true; |
40584 | } |
40585 | break; |
40586 | case MVT::v16i1: |
40587 | SExtVT = MVT::v16i8; |
40588 | |
40589 | |
40590 | |
40591 | |
40592 | break; |
40593 | case MVT::v32i1: |
40594 | SExtVT = MVT::v32i8; |
40595 | break; |
40596 | case MVT::v64i1: |
40597 | |
40598 | |
40599 | if (Subtarget.hasAVX512()) { |
40600 | if (Subtarget.hasBWI()) |
40601 | return SDValue(); |
40602 | SExtVT = MVT::v64i8; |
40603 | break; |
40604 | } |
40605 | |
40606 | if (checkBitcastSrcVectorSize(Src, 512, false)) { |
40607 | SExtVT = MVT::v64i8; |
40608 | break; |
40609 | } |
40610 | return SDValue(); |
40611 | }; |
40612 | |
40613 | SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) |
40614 | : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); |
40615 | |
40616 | if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) { |
40617 | V = getPMOVMSKB(DL, V, DAG, Subtarget); |
40618 | } else { |
40619 | if (SExtVT == MVT::v8i16) |
40620 | V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V, |
40621 | DAG.getUNDEF(MVT::v8i16)); |
40622 | V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); |
40623 | } |
40624 | |
40625 | EVT IntVT = |
40626 | EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements()); |
40627 | V = DAG.getZExtOrTrunc(V, DL, IntVT); |
40628 | return DAG.getBitcast(VT, V); |
40629 | } |
40630 | |
40631 | |
40632 | static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) { |
40633 | EVT SrcVT = Op.getValueType(); |
40634 | assert(SrcVT.getVectorElementType() == MVT::i1 && |
40635 | "Expected a vXi1 vector"); |
40636 | assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && |
40637 | "Expected a constant build vector"); |
40638 | |
40639 | APInt Imm(SrcVT.getVectorNumElements(), 0); |
40640 | for (unsigned Idx = 0, e = Op.getNumOperands(); Idx < e; ++Idx) { |
40641 | SDValue In = Op.getOperand(Idx); |
40642 | if (!In.isUndef() && (cast<ConstantSDNode>(In)->getZExtValue() & 0x1)) |
40643 | Imm.setBit(Idx); |
40644 | } |
40645 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), Imm.getBitWidth()); |
40646 | return DAG.getConstant(Imm, SDLoc(Op), IntVT); |
40647 | } |
40648 | |
40649 | static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG, |
40650 | TargetLowering::DAGCombinerInfo &DCI, |
40651 | const X86Subtarget &Subtarget) { |
40652 | assert(N->getOpcode() == ISD::BITCAST && "Expected a bitcast"); |
40653 | |
40654 | if (!DCI.isBeforeLegalizeOps()) |
40655 | return SDValue(); |
40656 | |
40657 | |
40658 | if (!Subtarget.hasAVX512()) |
40659 | return SDValue(); |
40660 | |
40661 | EVT DstVT = N->getValueType(0); |
40662 | SDValue Op = N->getOperand(0); |
40663 | EVT SrcVT = Op.getValueType(); |
40664 | |
40665 | if (!Op.hasOneUse()) |
40666 | return SDValue(); |
40667 | |
40668 | |
40669 | if (Op.getOpcode() != ISD::AND && |
40670 | Op.getOpcode() != ISD::OR && |
40671 | Op.getOpcode() != ISD::XOR) |
40672 | return SDValue(); |
40673 | |
40674 | |
40675 | if (!(SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && |
40676 | DstVT.isScalarInteger()) && |
40677 | !(DstVT.isVector() && DstVT.getVectorElementType() == MVT::i1 && |
40678 | SrcVT.isScalarInteger())) |
40679 | return SDValue(); |
40680 | |
40681 | SDValue LHS = Op.getOperand(0); |
40682 | SDValue RHS = Op.getOperand(1); |
40683 | |
40684 | if (LHS.hasOneUse() && LHS.getOpcode() == ISD::BITCAST && |
40685 | LHS.getOperand(0).getValueType() == DstVT) |
40686 | return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, LHS.getOperand(0), |
40687 | DAG.getBitcast(DstVT, RHS)); |
40688 | |
40689 | if (RHS.hasOneUse() && RHS.getOpcode() == ISD::BITCAST && |
40690 | RHS.getOperand(0).getValueType() == DstVT) |
40691 | return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, |
40692 | DAG.getBitcast(DstVT, LHS), RHS.getOperand(0)); |
40693 | |
40694 | |
40695 | |
40696 | if (ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) { |
40697 | RHS = combinevXi1ConstantToInteger(RHS, DAG); |
40698 | return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, |
40699 | DAG.getBitcast(DstVT, LHS), RHS); |
40700 | } |
40701 | |
40702 | return SDValue(); |
40703 | } |
40704 | |
40705 | static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG, |
40706 | const X86Subtarget &Subtarget) { |
40707 | SDLoc DL(BV); |
40708 | unsigned NumElts = BV->getNumOperands(); |
40709 | SDValue Splat = BV->getSplatValue(); |
40710 | |
40711 | |
40712 | auto CreateMMXElement = [&](SDValue V) { |
40713 | if (V.isUndef()) |
40714 | return DAG.getUNDEF(MVT::x86mmx); |
40715 | if (V.getValueType().isFloatingPoint()) { |
40716 | if (Subtarget.hasSSE1() && !isa<ConstantFPSDNode>(V)) { |
40717 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, V); |
40718 | V = DAG.getBitcast(MVT::v2i64, V); |
40719 | return DAG.getNode(X86ISD::MOVDQ2Q, DL, MVT::x86mmx, V); |
40720 | } |
40721 | V = DAG.getBitcast(MVT::i32, V); |
40722 | } else { |
40723 | V = DAG.getAnyExtOrTrunc(V, DL, MVT::i32); |
40724 | } |
40725 | return DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, V); |
40726 | }; |
40727 | |
40728 | |
40729 | SmallVector<SDValue, 8> Ops; |
40730 | |
40731 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
40732 | |
40733 | |
40734 | if (Splat) { |
40735 | if (Splat.isUndef()) |
40736 | return DAG.getUNDEF(MVT::x86mmx); |
40737 | |
40738 | Splat = CreateMMXElement(Splat); |
40739 | |
40740 | if (Subtarget.hasSSE1()) { |
40741 | |
40742 | if (NumElts == 8) |
40743 | Splat = DAG.getNode( |
40744 | ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, |
40745 | DAG.getTargetConstant(Intrinsic::x86_mmx_punpcklbw, DL, |
40746 | TLI.getPointerTy(DAG.getDataLayout())), |
40747 | Splat, Splat); |
40748 | |
40749 | |
40750 | unsigned ShufMask = (NumElts > 2 ? 0 : 0x44); |
40751 | return DAG.getNode( |
40752 | ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, |
40753 | DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, |
40754 | TLI.getPointerTy(DAG.getDataLayout())), |
40755 | Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8)); |
40756 | } |
40757 | Ops.append(NumElts, Splat); |
40758 | } else { |
40759 | for (unsigned i = 0; i != NumElts; ++i) |
40760 | Ops.push_back(CreateMMXElement(BV->getOperand(i))); |
40761 | } |
40762 | |
40763 | |
40764 | while (Ops.size() > 1) { |
40765 | unsigned NumOps = Ops.size(); |
40766 | unsigned IntrinOp = |
40767 | (NumOps == 2 ? Intrinsic::x86_mmx_punpckldq |
40768 | : (NumOps == 4 ? Intrinsic::x86_mmx_punpcklwd |
40769 | : Intrinsic::x86_mmx_punpcklbw)); |
40770 | SDValue Intrin = DAG.getTargetConstant( |
40771 | IntrinOp, DL, TLI.getPointerTy(DAG.getDataLayout())); |
40772 | for (unsigned i = 0; i != NumOps; i += 2) |
40773 | Ops[i / 2] = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, Intrin, |
40774 | Ops[i], Ops[i + 1]); |
40775 | Ops.resize(NumOps / 2); |
40776 | } |
40777 | |
40778 | return Ops[0]; |
40779 | } |
40780 | |
40781 | |
40782 | |
40783 | |
40784 | |
40785 | static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL, |
40786 | SelectionDAG &DAG, |
40787 | const X86Subtarget &Subtarget) { |
40788 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
40789 | unsigned Opc = V.getOpcode(); |
40790 | switch (Opc) { |
40791 | case ISD::BITCAST: { |
40792 | |
40793 | SDValue Src = V.getOperand(0); |
40794 | EVT SrcVT = Src.getValueType(); |
40795 | if (SrcVT.isVector() || SrcVT.isFloatingPoint()) |
40796 | return DAG.getBitcast(VT, Src); |
40797 | break; |
40798 | } |
40799 | case ISD::TRUNCATE: { |
40800 | |
40801 | SDValue Src = V.getOperand(0); |
40802 | EVT NewSrcVT = |
40803 | EVT::getVectorVT(*DAG.getContext(), MVT::i1, Src.getValueSizeInBits()); |
40804 | if (TLI.isTypeLegal(NewSrcVT)) |
40805 | if (SDValue N0 = |
40806 | combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget)) |
40807 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N0, |
40808 | DAG.getIntPtrConstant(0, DL)); |
40809 | break; |
40810 | } |
40811 | case ISD::ANY_EXTEND: |
40812 | case ISD::ZERO_EXTEND: { |
40813 | |
40814 | SDValue Src = V.getOperand(0); |
40815 | EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, |
40816 | Src.getScalarValueSizeInBits()); |
40817 | if (TLI.isTypeLegal(NewSrcVT)) |
40818 | if (SDValue N0 = |
40819 | combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget)) |
40820 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
40821 | Opc == ISD::ANY_EXTEND ? DAG.getUNDEF(VT) |
40822 | : DAG.getConstant(0, DL, VT), |
40823 | N0, DAG.getIntPtrConstant(0, DL)); |
40824 | break; |
40825 | } |
40826 | case ISD::OR: { |
40827 | |
40828 | SDValue Src0 = V.getOperand(0); |
40829 | SDValue Src1 = V.getOperand(1); |
40830 | if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget)) |
40831 | if (SDValue N1 = combineBitcastToBoolVector(VT, Src1, DL, DAG, Subtarget)) |
40832 | return DAG.getNode(Opc, DL, VT, N0, N1); |
40833 | break; |
40834 | } |
40835 | case ISD::SHL: { |
40836 | |
40837 | SDValue Src0 = V.getOperand(0); |
40838 | if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) || |
40839 | ((VT == MVT::v32i1 || VT == MVT::v64i1) && !Subtarget.hasBWI())) |
40840 | break; |
40841 | |
40842 | if (auto *Amt = dyn_cast<ConstantSDNode>(V.getOperand(1))) |
40843 | if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget)) |
40844 | return DAG.getNode( |
40845 | X86ISD::KSHIFTL, DL, VT, N0, |
40846 | DAG.getTargetConstant(Amt->getZExtValue(), DL, MVT::i8)); |
40847 | break; |
40848 | } |
40849 | } |
40850 | return SDValue(); |
40851 | } |
40852 | |
40853 | static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, |
40854 | TargetLowering::DAGCombinerInfo &DCI, |
40855 | const X86Subtarget &Subtarget) { |
40856 | SDValue N0 = N->getOperand(0); |
40857 | EVT VT = N->getValueType(0); |
40858 | EVT SrcVT = N0.getValueType(); |
40859 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
40860 | |
40861 | |
40862 | |
40863 | |
40864 | |
40865 | |
40866 | |
40867 | if (DCI.isBeforeLegalize()) { |
40868 | SDLoc dl(N); |
40869 | if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget)) |
40870 | return V; |
40871 | |
40872 | |
40873 | |
40874 | if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && |
40875 | Subtarget.hasAVX512()) { |
40876 | N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0); |
40877 | N0 = DAG.getBitcast(MVT::v8i1, N0); |
40878 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0, |
40879 | DAG.getIntPtrConstant(0, dl)); |
40880 | } |
40881 | |
40882 | |
40883 | |
40884 | if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && |
40885 | Subtarget.hasAVX512()) { |
40886 | |
40887 | |
40888 | |
40889 | |
40890 | |
40891 | |
40892 | |
40893 | if (N0.getOpcode() == ISD::CONCAT_VECTORS) { |
40894 | SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1); |
40895 | if (ISD::isBuildVectorAllZeros(LastOp.getNode())) { |
40896 | SrcVT = LastOp.getValueType(); |
40897 | unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); |
40898 | SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end()); |
40899 | Ops.resize(NumConcats, DAG.getConstant(0, dl, SrcVT)); |
40900 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); |
40901 | N0 = DAG.getBitcast(MVT::i8, N0); |
40902 | return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); |
40903 | } |
40904 | } |
40905 | |
40906 | unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); |
40907 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); |
40908 | Ops[0] = N0; |
40909 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); |
40910 | N0 = DAG.getBitcast(MVT::i8, N0); |
40911 | return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); |
40912 | } |
40913 | } else { |
40914 | |
40915 | |
40916 | if (VT.isVector() && VT.getScalarType() == MVT::i1 && |
40917 | SrcVT.isScalarInteger() && TLI.isTypeLegal(VT)) { |
40918 | if (SDValue V = |
40919 | combineBitcastToBoolVector(VT, N0, SDLoc(N), DAG, Subtarget)) |
40920 | return V; |
40921 | } |
40922 | } |
40923 | |
40924 | |
40925 | |
40926 | |
40927 | |
40928 | |
40929 | if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.hasAVX512() && |
40930 | !Subtarget.hasDQI() && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
40931 | N0.getOperand(0).getValueType() == MVT::v16i1 && |
40932 | isNullConstant(N0.getOperand(1))) |
40933 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, |
40934 | DAG.getBitcast(MVT::i16, N0.getOperand(0))); |
40935 | |
40936 | |
40937 | |
40938 | |
40939 | if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() && |
40940 | VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) { |
40941 | auto *BCast = cast<MemIntrinsicSDNode>(N0); |
40942 | unsigned SrcVTSize = SrcVT.getScalarSizeInBits(); |
40943 | unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits(); |
40944 | |
40945 | if (MemSize >= 32) { |
40946 | MVT MemVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(MemSize) |
40947 | : MVT::getIntegerVT(MemSize); |
40948 | MVT LoadVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(SrcVTSize) |
40949 | : MVT::getIntegerVT(SrcVTSize); |
40950 | LoadVT = MVT::getVectorVT(LoadVT, SrcVT.getVectorNumElements()); |
40951 | |
40952 | SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other); |
40953 | SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() }; |
40954 | SDValue ResNode = |
40955 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops, |
40956 | MemVT, BCast->getMemOperand()); |
40957 | DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1)); |
40958 | return DAG.getBitcast(VT, ResNode); |
40959 | } |
40960 | } |
40961 | |
40962 | |
40963 | |
40964 | |
40965 | if (VT == MVT::x86mmx) { |
40966 | |
40967 | APInt UndefElts; |
40968 | SmallVector<APInt, 1> EltBits; |
40969 | if (getTargetConstantBitsFromNode(N0, 64, UndefElts, EltBits)) { |
40970 | SDLoc DL(N0); |
40971 | |
40972 | if (EltBits[0].countLeadingZeros() >= 32) |
40973 | return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT, |
40974 | DAG.getConstant(EltBits[0].trunc(32), DL, MVT::i32)); |
40975 | |
40976 | |
40977 | APFloat F64(APFloat::IEEEdouble(), EltBits[0]); |
40978 | return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64)); |
40979 | } |
40980 | |
40981 | |
40982 | if (N0.getOpcode() == ISD::BUILD_VECTOR && |
40983 | (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) && |
40984 | N0.getOperand(0).getValueType() == SrcVT.getScalarType()) { |
40985 | bool LowUndef = true, AllUndefOrZero = true; |
40986 | for (unsigned i = 1, e = SrcVT.getVectorNumElements(); i != e; ++i) { |
40987 | SDValue Op = N0.getOperand(i); |
40988 | LowUndef &= Op.isUndef() || (i >= e/2); |
40989 | AllUndefOrZero &= (Op.isUndef() || isNullConstant(Op)); |
40990 | } |
40991 | if (AllUndefOrZero) { |
40992 | SDValue N00 = N0.getOperand(0); |
40993 | SDLoc dl(N00); |
40994 | N00 = LowUndef ? DAG.getAnyExtOrTrunc(N00, dl, MVT::i32) |
40995 | : DAG.getZExtOrTrunc(N00, dl, MVT::i32); |
40996 | return DAG.getNode(X86ISD::MMX_MOVW2D, dl, VT, N00); |
40997 | } |
40998 | } |
40999 | |
41000 | |
41001 | |
41002 | |
41003 | if (N0.getOpcode() == ISD::BUILD_VECTOR && |
41004 | (SrcVT == MVT::v2f32 || SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || |
41005 | SrcVT == MVT::v8i8)) |
41006 | return createMMXBuildVector(cast<BuildVectorSDNode>(N0), DAG, Subtarget); |
41007 | |
41008 | |
41009 | if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT || |
41010 | N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) && |
41011 | isNullConstant(N0.getOperand(1))) { |
41012 | SDValue N00 = N0.getOperand(0); |
41013 | if (N00.getValueType().is128BitVector()) |
41014 | return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT, |
41015 | DAG.getBitcast(MVT::v2i64, N00)); |
41016 | } |
41017 | |
41018 | |
41019 | if (SrcVT == MVT::v2i32 && N0.getOpcode() == ISD::FP_TO_SINT) { |
41020 | SDLoc DL(N0); |
41021 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, |
41022 | DAG.getUNDEF(MVT::v2i32)); |
41023 | return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT, |
41024 | DAG.getBitcast(MVT::v2i64, Res)); |
41025 | } |
41026 | } |
41027 | |
41028 | |
41029 | |
41030 | if (Subtarget.hasAVX512() && VT.isScalarInteger() && |
41031 | SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && |
41032 | ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { |
41033 | return combinevXi1ConstantToInteger(N0, DAG); |
41034 | } |
41035 | |
41036 | if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && |
41037 | VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
41038 | isa<ConstantSDNode>(N0)) { |
41039 | auto *C = cast<ConstantSDNode>(N0); |
41040 | if (C->isAllOnesValue()) |
41041 | return DAG.getConstant(1, SDLoc(N0), VT); |
41042 | if (C->isNullValue()) |
41043 | return DAG.getConstant(0, SDLoc(N0), VT); |
41044 | } |
41045 | |
41046 | |
41047 | |
41048 | |
41049 | if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && |
41050 | VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
41051 | isPowerOf2_32(VT.getVectorNumElements())) { |
41052 | unsigned NumElts = VT.getVectorNumElements(); |
41053 | SDValue Src = N0; |
41054 | |
41055 | |
41056 | if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) |
41057 | Src = N0.getOperand(0); |
41058 | |
41059 | if (Src.getOpcode() == X86ISD::MOVMSK && Src.hasOneUse()) { |
41060 | SDValue MovmskIn = Src.getOperand(0); |
41061 | MVT MovmskVT = MovmskIn.getSimpleValueType(); |
41062 | unsigned MovMskElts = MovmskVT.getVectorNumElements(); |
41063 | |
41064 | |
41065 | |
41066 | if (MovMskElts <= NumElts && |
41067 | (Subtarget.hasBWI() || MovmskVT.getVectorElementType() != MVT::i8)) { |
41068 | EVT IntVT = EVT(MovmskVT).changeVectorElementTypeToInteger(); |
41069 | MovmskIn = DAG.getBitcast(IntVT, MovmskIn); |
41070 | SDLoc dl(N); |
41071 | MVT CmpVT = MVT::getVectorVT(MVT::i1, MovMskElts); |
41072 | SDValue Cmp = DAG.getSetCC(dl, CmpVT, MovmskIn, |
41073 | DAG.getConstant(0, dl, IntVT), ISD::SETLT); |
41074 | if (EVT(CmpVT) == VT) |
41075 | return Cmp; |
41076 | |
41077 | |
41078 | |
41079 | unsigned NumConcats = NumElts / MovMskElts; |
41080 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, CmpVT)); |
41081 | Ops[0] = Cmp; |
41082 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Ops); |
41083 | } |
41084 | } |
41085 | } |
41086 | |
41087 | |
41088 | |
41089 | if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget)) |
41090 | return V; |
41091 | |
41092 | |
41093 | |
41094 | |
41095 | |
41096 | |
41097 | unsigned FPOpcode; |
41098 | switch (N0.getOpcode()) { |
41099 | case ISD::AND: FPOpcode = X86ISD::FAND; break; |
41100 | case ISD::OR: FPOpcode = X86ISD::FOR; break; |
41101 | case ISD::XOR: FPOpcode = X86ISD::FXOR; break; |
41102 | default: return SDValue(); |
41103 | } |
41104 | |
41105 | |
41106 | if (!((Subtarget.hasSSE1() && VT == MVT::f32) || |
41107 | (Subtarget.hasSSE2() && VT == MVT::f64) || |
41108 | (Subtarget.hasFP16() && VT == MVT::f16) || |
41109 | (Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() && |
41110 | TLI.isTypeLegal(VT)))) |
41111 | return SDValue(); |
41112 | |
41113 | SDValue LogicOp0 = N0.getOperand(0); |
41114 | SDValue LogicOp1 = N0.getOperand(1); |
41115 | SDLoc DL0(N0); |
41116 | |
41117 | |
41118 | if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST && |
41119 | LogicOp0.hasOneUse() && LogicOp0.getOperand(0).hasOneUse() && |
41120 | LogicOp0.getOperand(0).getValueType() == VT && |
41121 | !isa<ConstantSDNode>(LogicOp0.getOperand(0))) { |
41122 | SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1); |
41123 | unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); |
41124 | return DAG.getNode(Opcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1); |
41125 | } |
41126 | |
41127 | if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST && |
41128 | LogicOp1.hasOneUse() && LogicOp1.getOperand(0).hasOneUse() && |
41129 | LogicOp1.getOperand(0).getValueType() == VT && |
41130 | !isa<ConstantSDNode>(LogicOp1.getOperand(0))) { |
41131 | SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0); |
41132 | unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); |
41133 | return DAG.getNode(Opcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0); |
41134 | } |
41135 | |
41136 | return SDValue(); |
41137 | } |
41138 | |
41139 | |
41140 | |
41141 | |
41142 | static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) { |
41143 | SDValue AbsOp1 = Abs->getOperand(0); |
41144 | if (AbsOp1.getOpcode() != ISD::SUB) |
41145 | return false; |
41146 | |
41147 | Op0 = AbsOp1.getOperand(0); |
41148 | Op1 = AbsOp1.getOperand(1); |
41149 | |
41150 | |
41151 | if (Op0.getOpcode() != ISD::ZERO_EXTEND || |
41152 | Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 || |
41153 | Op1.getOpcode() != ISD::ZERO_EXTEND || |
41154 | Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8) |
41155 | return false; |
41156 | |
41157 | return true; |
41158 | } |
41159 | |
41160 | |
41161 | |
41162 | static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0, |
41163 | const SDValue &Zext1, const SDLoc &DL, |
41164 | const X86Subtarget &Subtarget) { |
41165 | |
41166 | EVT InVT = Zext0.getOperand(0).getValueType(); |
41167 | unsigned RegSize = std::max(128u, (unsigned)InVT.getSizeInBits()); |
41168 | |
41169 | |
41170 | |
41171 | unsigned NumConcat = RegSize / InVT.getSizeInBits(); |
41172 | SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, InVT)); |
41173 | Ops[0] = Zext0.getOperand(0); |
41174 | MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8); |
41175 | SDValue SadOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops); |
41176 | Ops[0] = Zext1.getOperand(0); |
41177 | SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops); |
41178 | |
41179 | |
41180 | auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
41181 | ArrayRef<SDValue> Ops) { |
41182 | MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64); |
41183 | return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops); |
41184 | }; |
41185 | MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64); |
41186 | return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, { SadOp0, SadOp1 }, |
41187 | PSADBWBuilder); |
41188 | } |
41189 | |
41190 | |
41191 | |
41192 | static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG, |
41193 | const X86Subtarget &Subtarget) { |
41194 | |
41195 | if (!Subtarget.hasSSE41()) |
41196 | return SDValue(); |
41197 | |
41198 | EVT ExtractVT = Extract->getValueType(0); |
41199 | if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8) |
41200 | return SDValue(); |
41201 | |
41202 | |
41203 | ISD::NodeType BinOp; |
41204 | SDValue Src = DAG.matchBinOpReduction( |
41205 | Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true); |
41206 | if (!Src) |
41207 | return SDValue(); |
41208 | |
41209 | EVT SrcVT = Src.getValueType(); |
41210 | EVT SrcSVT = SrcVT.getScalarType(); |
41211 | if (SrcSVT != ExtractVT || (SrcVT.getSizeInBits() % 128) != 0) |
41212 | return SDValue(); |
41213 | |
41214 | SDLoc DL(Extract); |
41215 | SDValue MinPos = Src; |
41216 | |
41217 | |
41218 | while (SrcVT.getSizeInBits() > 128) { |
41219 | SDValue Lo, Hi; |
41220 | std::tie(Lo, Hi) = splitVector(MinPos, DAG, DL); |
41221 | SrcVT = Lo.getValueType(); |
41222 | MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi); |
41223 | } |
41224 | assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) || |
41225 | (SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) && |
41226 | "Unexpected value type"); |
41227 | |
41228 | |
41229 | |
41230 | SDValue Mask; |
41231 | unsigned MaskEltsBits = ExtractVT.getSizeInBits(); |
41232 | if (BinOp == ISD::SMAX) |
41233 | Mask = DAG.getConstant(APInt::getSignedMaxValue(MaskEltsBits), DL, SrcVT); |
41234 | else if (BinOp == ISD::SMIN) |
41235 | Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT); |
41236 | else if (BinOp == ISD::UMAX) |
41237 | Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT); |
41238 | |
41239 | if (Mask) |
41240 | MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); |
41241 | |
41242 | |
41243 | |
41244 | |
41245 | |
41246 | if (ExtractVT == MVT::i8) { |
41247 | SDValue Upper = DAG.getVectorShuffle( |
41248 | SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8), |
41249 | {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16}); |
41250 | MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper); |
41251 | } |
41252 | |
41253 | |
41254 | MinPos = DAG.getBitcast(MVT::v8i16, MinPos); |
41255 | MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, MVT::v8i16, MinPos); |
41256 | MinPos = DAG.getBitcast(SrcVT, MinPos); |
41257 | |
41258 | if (Mask) |
41259 | MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); |
41260 | |
41261 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, MinPos, |
41262 | DAG.getIntPtrConstant(0, DL)); |
41263 | } |
41264 | |
41265 | |
41266 | static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, |
41267 | const X86Subtarget &Subtarget) { |
41268 | |
41269 | if (!Subtarget.hasSSE2()) |
41270 | return SDValue(); |
41271 | |
41272 | EVT ExtractVT = Extract->getValueType(0); |
41273 | unsigned BitWidth = ExtractVT.getSizeInBits(); |
41274 | if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 && |
41275 | ExtractVT != MVT::i8 && ExtractVT != MVT::i1) |
41276 | return SDValue(); |
41277 | |
41278 | |
41279 | ISD::NodeType BinOp; |
41280 | SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND}); |
41281 | if (!Match && ExtractVT == MVT::i1) |
41282 | Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::XOR}); |
41283 | if (!Match) |
41284 | return SDValue(); |
41285 | |
41286 | |
41287 | |
41288 | if (Match.getScalarValueSizeInBits() != BitWidth) |
41289 | return SDValue(); |
41290 | |
41291 | SDValue Movmsk; |
41292 | SDLoc DL(Extract); |
41293 | EVT MatchVT = Match.getValueType(); |
41294 | unsigned NumElts = MatchVT.getVectorNumElements(); |
41295 | unsigned MaxElts = Subtarget.hasInt256() ? 32 : 16; |
41296 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
41297 | |
41298 | if (ExtractVT == MVT::i1) { |
41299 | |
41300 | if (NumElts > 64 || !isPowerOf2_32(NumElts)) |
41301 | return SDValue(); |
41302 | if (TLI.isTypeLegal(MatchVT)) { |
41303 | |
41304 | EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
41305 | Movmsk = DAG.getBitcast(MovmskVT, Match); |
41306 | } else { |
41307 | |
41308 | |
41309 | if (BinOp == ISD::AND && !Subtarget.hasSSE41() && |
41310 | Match.getOpcode() == ISD::SETCC && |
41311 | ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) && |
41312 | cast<CondCodeSDNode>(Match.getOperand(2))->get() == |
41313 | ISD::CondCode::SETEQ) { |
41314 | SDValue Vec = Match.getOperand(0); |
41315 | if (Vec.getValueType().getScalarType() == MVT::i64 && |
41316 | (2 * NumElts) <= MaxElts) { |
41317 | NumElts *= 2; |
41318 | EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); |
41319 | MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts); |
41320 | Match = DAG.getSetCC( |
41321 | DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)), |
41322 | DAG.getBitcast(CmpVT, Match.getOperand(1)), ISD::CondCode::SETEQ); |
41323 | } |
41324 | } |
41325 | |
41326 | |
41327 | while (NumElts > MaxElts) { |
41328 | SDValue Lo, Hi; |
41329 | std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); |
41330 | Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); |
41331 | NumElts /= 2; |
41332 | } |
41333 | EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
41334 | Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget); |
41335 | } |
41336 | if (!Movmsk) |
41337 | return SDValue(); |
41338 | Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, NumElts > 32 ? MVT::i64 : MVT::i32); |
41339 | } else { |
41340 | |
41341 | unsigned MatchSizeInBits = Match.getValueSizeInBits(); |
41342 | if (!(MatchSizeInBits == 128 || |
41343 | (MatchSizeInBits == 256 && Subtarget.hasAVX()))) |
41344 | return SDValue(); |
41345 | |
41346 | |
41347 | |
41348 | |
41349 | |
41350 | if (Match.getValueType().getVectorNumElements() < 2) |
41351 | return SDValue(); |
41352 | |
41353 | |
41354 | if (DAG.ComputeNumSignBits(Match) != BitWidth) |
41355 | return SDValue(); |
41356 | |
41357 | if (MatchSizeInBits == 256 && BitWidth < 32 && !Subtarget.hasInt256()) { |
41358 | SDValue Lo, Hi; |
41359 | std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); |
41360 | Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); |
41361 | MatchSizeInBits = Match.getValueSizeInBits(); |
41362 | } |
41363 | |
41364 | |
41365 | MVT MaskSrcVT; |
41366 | if (64 == BitWidth || 32 == BitWidth) |
41367 | MaskSrcVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth), |
41368 | MatchSizeInBits / BitWidth); |
41369 | else |
41370 | MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8); |
41371 | |
41372 | SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match); |
41373 | Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget); |
41374 | NumElts = MaskSrcVT.getVectorNumElements(); |
41375 | } |
41376 | assert((NumElts <= 32 || NumElts == 64) && |
41377 | "Not expecting more than 64 elements"); |
41378 | |
41379 | MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32; |
41380 | if (BinOp == ISD::XOR) { |
41381 | |
41382 | SDValue Result = DAG.getNode(ISD::PARITY, DL, CmpVT, Movmsk); |
41383 | return DAG.getZExtOrTrunc(Result, DL, ExtractVT); |
41384 | } |
41385 | |
41386 | SDValue CmpC; |
41387 | ISD::CondCode CondCode; |
41388 | if (BinOp == ISD::OR) { |
41389 | |
41390 | CmpC = DAG.getConstant(0, DL, CmpVT); |
41391 | CondCode = ISD::CondCode::SETNE; |
41392 | } else { |
41393 | |
41394 | CmpC = DAG.getConstant(APInt::getLowBitsSet(CmpVT.getSizeInBits(), NumElts), |
41395 | DL, CmpVT); |
41396 | CondCode = ISD::CondCode::SETEQ; |
41397 | } |
41398 | |
41399 | |
41400 | |
41401 | EVT SetccVT = |
41402 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); |
41403 | SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode); |
41404 | SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT); |
41405 | SDValue Zero = DAG.getConstant(0, DL, ExtractVT); |
41406 | return DAG.getNode(ISD::SUB, DL, ExtractVT, Zero, Zext); |
41407 | } |
41408 | |
41409 | static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, |
41410 | const X86Subtarget &Subtarget) { |
41411 | |
41412 | if (!Subtarget.hasSSE2()) |
41413 | return SDValue(); |
41414 | |
41415 | EVT ExtractVT = Extract->getValueType(0); |
41416 | |
41417 | |
41418 | if (ExtractVT != MVT::i32 && ExtractVT != MVT::i64) |
41419 | return SDValue(); |
41420 | |
41421 | EVT VT = Extract->getOperand(0).getValueType(); |
41422 | if (!isPowerOf2_32(VT.getVectorNumElements())) |
41423 | return SDValue(); |
41424 | |
41425 | |
41426 | ISD::NodeType BinOp; |
41427 | SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD}); |
41428 | |
41429 | |
41430 | |
41431 | |
41432 | |
41433 | |
41434 | |
41435 | |
41436 | |
41437 | if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND || |
41438 | Root.getOpcode() == ISD::ZERO_EXTEND || |
41439 | Root.getOpcode() == ISD::ANY_EXTEND)) |
41440 | Root = Root.getOperand(0); |
41441 | |
41442 | |
41443 | |
41444 | if (!Root || Root.getOpcode() != ISD::ABS) |
41445 | return SDValue(); |
41446 | |
41447 | |
41448 | SDValue Zext0, Zext1; |
41449 | if (!detectZextAbsDiff(Root, Zext0, Zext1)) |
41450 | return SDValue(); |
41451 | |
41452 | |
41453 | SDLoc DL(Extract); |
41454 | SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL, Subtarget); |
41455 | |
41456 | |
41457 | |
41458 | unsigned Stages = Log2_32(VT.getVectorNumElements()); |
41459 | EVT SadVT = SAD.getValueType(); |
41460 | if (Stages > 3) { |
41461 | unsigned SadElems = SadVT.getVectorNumElements(); |
41462 | |
41463 | for(unsigned i = Stages - 3; i > 0; --i) { |
41464 | SmallVector<int, 16> Mask(SadElems, -1); |
41465 | for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j) |
41466 | Mask[j] = MaskEnd + j; |
41467 | |
41468 | SDValue Shuffle = |
41469 | DAG.getVectorShuffle(SadVT, DL, SAD, DAG.getUNDEF(SadVT), Mask); |
41470 | SAD = DAG.getNode(ISD::ADD, DL, SadVT, SAD, Shuffle); |
41471 | } |
41472 | } |
41473 | |
41474 | unsigned ExtractSizeInBits = ExtractVT.getSizeInBits(); |
41475 | |
41476 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), ExtractVT, |
41477 | SadVT.getSizeInBits() / ExtractSizeInBits); |
41478 | SAD = DAG.getBitcast(ResVT, SAD); |
41479 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, SAD, |
41480 | Extract->getOperand(1)); |
41481 | } |
41482 | |
41483 | |
41484 | |
41485 | static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, |
41486 | TargetLowering::DAGCombinerInfo &DCI, |
41487 | const X86Subtarget &Subtarget) { |
41488 | if (DCI.isBeforeLegalizeOps()) |
41489 | return SDValue(); |
41490 | |
41491 | SDLoc dl(N); |
41492 | SDValue Src = N->getOperand(0); |
41493 | SDValue Idx = N->getOperand(1); |
41494 | |
41495 | EVT VT = N->getValueType(0); |
41496 | EVT SrcVT = Src.getValueType(); |
41497 | EVT SrcSVT = SrcVT.getVectorElementType(); |
41498 | unsigned SrcEltBits = SrcSVT.getSizeInBits(); |
41499 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
41500 | |
41501 | |
41502 | if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx)) |
41503 | return SDValue(); |
41504 | |
41505 | const APInt &IdxC = N->getConstantOperandAPInt(1); |
41506 | if (IdxC.uge(NumSrcElts)) |
41507 | return SDValue(); |
41508 | |
41509 | SDValue SrcBC = peekThroughBitcasts(Src); |
41510 | |
41511 | |
41512 | if (X86ISD::VBROADCAST == SrcBC.getOpcode()) { |
41513 | SDValue SrcOp = SrcBC.getOperand(0); |
41514 | EVT SrcOpVT = SrcOp.getValueType(); |
41515 | if (SrcOpVT.isScalarInteger() && VT.isInteger() && |
41516 | (SrcOpVT.getSizeInBits() % SrcEltBits) == 0) { |
41517 | unsigned Scale = SrcOpVT.getSizeInBits() / SrcEltBits; |
41518 | unsigned Offset = IdxC.urem(Scale) * SrcEltBits; |
41519 | |
41520 | if (Offset == 0) { |
41521 | SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, SrcVT.getScalarType()); |
41522 | SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT); |
41523 | return SrcOp; |
41524 | } |
41525 | } |
41526 | } |
41527 | |
41528 | |
41529 | |
41530 | if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) { |
41531 | auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC); |
41532 | unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits(); |
41533 | if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth && |
41534 | VT.getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) { |
41535 | SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(), |
41536 | MemIntr->getBasePtr(), |
41537 | MemIntr->getPointerInfo(), |
41538 | MemIntr->getOriginalAlign(), |
41539 | MemIntr->getMemOperand()->getFlags()); |
41540 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); |
41541 | return Load; |
41542 | } |
41543 | } |
41544 | |
41545 | |
41546 | |
41547 | if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() && |
41548 | SrcBC.getValueType().isInteger() && |
41549 | (SrcBC.getScalarValueSizeInBits() % SrcEltBits) == 0 && |
41550 | SrcBC.getScalarValueSizeInBits() == |
41551 | SrcBC.getOperand(0).getValueSizeInBits()) { |
41552 | unsigned Scale = SrcBC.getScalarValueSizeInBits() / SrcEltBits; |
41553 | if (IdxC.ult(Scale)) { |
41554 | unsigned Offset = IdxC.getZExtValue() * SrcVT.getScalarSizeInBits(); |
41555 | SDValue Scl = SrcBC.getOperand(0); |
41556 | EVT SclVT = Scl.getValueType(); |
41557 | if (Offset) { |
41558 | Scl = DAG.getNode(ISD::SRL, dl, SclVT, Scl, |
41559 | DAG.getShiftAmountConstant(Offset, SclVT, dl)); |
41560 | } |
41561 | Scl = DAG.getZExtOrTrunc(Scl, dl, SrcVT.getScalarType()); |
41562 | Scl = DAG.getZExtOrTrunc(Scl, dl, VT); |
41563 | return Scl; |
41564 | } |
41565 | } |
41566 | |
41567 | |
41568 | |
41569 | |
41570 | if (ISD::TRUNCATE == Src.getOpcode() && IdxC == 0 && |
41571 | (SrcVT.getSizeInBits() % 128) == 0) { |
41572 | Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl); |
41573 | MVT ExtractVT = MVT::getVectorVT(SrcSVT.getSimpleVT(), 128 / SrcEltBits); |
41574 | return DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(ExtractVT, Src), |
41575 | Idx); |
41576 | } |
41577 | |
41578 | |
41579 | |
41580 | |
41581 | auto GetLegalExtract = [&Subtarget, &DAG, &dl](SDValue Vec, EVT VecVT, |
41582 | unsigned Idx) { |
41583 | EVT VecSVT = VecVT.getScalarType(); |
41584 | if ((VecVT.is256BitVector() || VecVT.is512BitVector()) && |
41585 | (VecSVT == MVT::i8 || VecSVT == MVT::i16 || VecSVT == MVT::i32 || |
41586 | VecSVT == MVT::i64)) { |
41587 | unsigned EltSizeInBits = VecSVT.getSizeInBits(); |
41588 | unsigned NumEltsPerLane = 128 / EltSizeInBits; |
41589 | unsigned LaneOffset = (Idx & ~(NumEltsPerLane - 1)) * EltSizeInBits; |
41590 | unsigned LaneIdx = LaneOffset / Vec.getScalarValueSizeInBits(); |
41591 | VecVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumEltsPerLane); |
41592 | Vec = extract128BitVector(Vec, LaneIdx, DAG, dl); |
41593 | Idx &= (NumEltsPerLane - 1); |
41594 | } |
41595 | if ((VecVT == MVT::v4i32 || VecVT == MVT::v2i64) && |
41596 | ((Idx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) { |
41597 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VecVT.getScalarType(), |
41598 | DAG.getBitcast(VecVT, Vec), |
41599 | DAG.getIntPtrConstant(Idx, dl)); |
41600 | } |
41601 | if ((VecVT == MVT::v8i16 && Subtarget.hasSSE2()) || |
41602 | (VecVT == MVT::v16i8 && Subtarget.hasSSE41())) { |
41603 | unsigned OpCode = (VecVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB); |
41604 | return DAG.getNode(OpCode, dl, MVT::i32, DAG.getBitcast(VecVT, Vec), |
41605 | DAG.getTargetConstant(Idx, dl, MVT::i8)); |
41606 | } |
41607 | return SDValue(); |
41608 | }; |
41609 | |
41610 | |
41611 | SmallVector<int, 16> Mask; |
41612 | SmallVector<SDValue, 2> Ops; |
41613 | if (!getTargetShuffleInputs(SrcBC, Ops, Mask, DAG)) |
41614 | return SDValue(); |
41615 | |
41616 | |
41617 | if (llvm::any_of(Ops, [SrcVT](SDValue Op) { |
41618 | return SrcVT.getSizeInBits() != Op.getValueSizeInBits(); |
41619 | })) |
41620 | return SDValue(); |
41621 | |
41622 | |
41623 | if (Mask.size() != NumSrcElts) { |
41624 | if ((NumSrcElts % Mask.size()) == 0) { |
41625 | SmallVector<int, 16> ScaledMask; |
41626 | int Scale = NumSrcElts / Mask.size(); |
41627 | narrowShuffleMaskElts(Scale, Mask, ScaledMask); |
41628 | Mask = std::move(ScaledMask); |
41629 | } else if ((Mask.size() % NumSrcElts) == 0) { |
41630 | |
41631 | int ExtractIdx = (int)IdxC.getZExtValue(); |
41632 | int Scale = Mask.size() / NumSrcElts; |
41633 | int Lo = Scale * ExtractIdx; |
41634 | int Hi = Scale * (ExtractIdx + 1); |
41635 | for (int i = 0, e = (int)Mask.size(); i != e; ++i) |
41636 | if (i < Lo || Hi <= i) |
41637 | Mask[i] = SM_SentinelUndef; |
41638 | |
41639 | SmallVector<int, 16> WidenedMask; |
41640 | while (Mask.size() > NumSrcElts && |
41641 | canWidenShuffleElements(Mask, WidenedMask)) |
41642 | Mask = std::move(WidenedMask); |
41643 | } |
41644 | } |
41645 | |
41646 | |
41647 | int ExtractIdx; |
41648 | EVT ExtractVT; |
41649 | if (Mask.size() == NumSrcElts) { |
41650 | ExtractIdx = Mask[IdxC.getZExtValue()]; |
41651 | ExtractVT = SrcVT; |
41652 | } else { |
41653 | unsigned Scale = Mask.size() / NumSrcElts; |
41654 | if ((Mask.size() % NumSrcElts) != 0 || SrcVT.isFloatingPoint()) |
41655 | return SDValue(); |
41656 | unsigned ScaledIdx = Scale * IdxC.getZExtValue(); |
41657 | if (!isUndefOrZeroInRange(Mask, ScaledIdx + 1, Scale - 1)) |
41658 | return SDValue(); |
41659 | ExtractIdx = Mask[ScaledIdx]; |
41660 | EVT ExtractSVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltBits / Scale); |
41661 | ExtractVT = EVT::getVectorVT(*DAG.getContext(), ExtractSVT, Mask.size()); |
41662 | assert(SrcVT.getSizeInBits() == ExtractVT.getSizeInBits() && |
41663 | "Failed to widen vector type"); |
41664 | } |
41665 | |
41666 | |
41667 | if (ExtractIdx == SM_SentinelUndef) |
41668 | return DAG.getUNDEF(VT); |
41669 | |
41670 | if (ExtractIdx == SM_SentinelZero) |
41671 | return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT) |
41672 | : DAG.getConstant(0, dl, VT); |
41673 | |
41674 | SDValue SrcOp = Ops[ExtractIdx / Mask.size()]; |
41675 | ExtractIdx = ExtractIdx % Mask.size(); |
41676 | if (SDValue V = GetLegalExtract(SrcOp, ExtractVT, ExtractIdx)) |
41677 | return DAG.getZExtOrTrunc(V, dl, VT); |
41678 | |
41679 | return SDValue(); |
41680 | } |
41681 | |
41682 | |
41683 | |
41684 | static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG, |
41685 | const X86Subtarget &Subtarget) { |
41686 | assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract"); |
41687 | SDValue Vec = ExtElt->getOperand(0); |
41688 | SDValue Index = ExtElt->getOperand(1); |
41689 | EVT VT = ExtElt->getValueType(0); |
41690 | EVT VecVT = Vec.getValueType(); |
41691 | |
41692 | |
41693 | |
41694 | if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT) |
41695 | return SDValue(); |
41696 | |
41697 | |
41698 | |
41699 | if (Vec.getOpcode() == ISD::SETCC && VT == MVT::i1) { |
41700 | EVT OpVT = Vec.getOperand(0).getValueType().getScalarType(); |
41701 | if (OpVT != MVT::f32 && OpVT != MVT::f64) |
41702 | return SDValue(); |
41703 | |
41704 | |
41705 | SDLoc DL(ExtElt); |
41706 | SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, |
41707 | Vec.getOperand(0), Index); |
41708 | SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, |
41709 | Vec.getOperand(1), Index); |
41710 | return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2)); |
41711 | } |
41712 | |
41713 | if (!(VT == MVT::f16 && Subtarget.hasFP16()) && VT != MVT::f32 && |
41714 | VT != MVT::f64) |
41715 | return SDValue(); |
41716 | |
41717 | |
41718 | |
41719 | |
41720 | |
41721 | |
41722 | |
41723 | if (Vec.getOpcode() == ISD::VSELECT && |
41724 | Vec.getOperand(0).getOpcode() == ISD::SETCC && |
41725 | Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 && |
41726 | Vec.getOperand(0).getOperand(0).getValueType() == VecVT) { |
41727 | |
41728 | SDLoc DL(ExtElt); |
41729 | SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, |
41730 | Vec.getOperand(0).getValueType().getScalarType(), |
41731 | Vec.getOperand(0), Index); |
41732 | SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
41733 | Vec.getOperand(1), Index); |
41734 | SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
41735 | Vec.getOperand(2), Index); |
41736 | return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2); |
41737 | } |
41738 | |
41739 | |
41740 | |
41741 | |
41742 | switch (Vec.getOpcode()) { |
41743 | case ISD::FMA: |
41744 | case ISD::FMAD: |
41745 | case ISD::FADD: |
41746 | case ISD::FSUB: |
41747 | case ISD::FMUL: |
41748 | case ISD::FDIV: |
41749 | case ISD::FREM: |
41750 | case ISD::FCOPYSIGN: |
41751 | case ISD::FMINNUM: |
41752 | case ISD::FMAXNUM: |
41753 | case ISD::FMINNUM_IEEE: |
41754 | case ISD::FMAXNUM_IEEE: |
41755 | case ISD::FMAXIMUM: |
41756 | case ISD::FMINIMUM: |
41757 | case X86ISD::FMAX: |
41758 | case X86ISD::FMIN: |
41759 | case ISD::FABS: |
41760 | case ISD::FSQRT: |
41761 | case ISD::FRINT: |
41762 | case ISD::FCEIL: |
41763 | case ISD::FTRUNC: |
41764 | case ISD::FNEARBYINT: |
41765 | case ISD::FROUND: |
41766 | case ISD::FFLOOR: |
41767 | case X86ISD::FRCP: |
41768 | case X86ISD::FRSQRT: { |
41769 | |
41770 | SDLoc DL(ExtElt); |
41771 | SmallVector<SDValue, 4> ExtOps; |
41772 | for (SDValue Op : Vec->ops()) |
41773 | ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index)); |
41774 | return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps); |
41775 | } |
41776 | default: |
41777 | return SDValue(); |
41778 | } |
41779 | llvm_unreachable("All opcodes should return within switch"); |
41780 | } |
41781 | |
41782 | |
41783 | |
41784 | static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, |
41785 | const X86Subtarget &Subtarget) { |
41786 | assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller"); |
41787 | |
41788 | |
41789 | if (!Subtarget.hasSSE2()) |
41790 | return SDValue(); |
41791 | |
41792 | ISD::NodeType Opc; |
41793 | SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, |
41794 | {ISD::ADD, ISD::MUL, ISD::FADD}, true); |
41795 | if (!Rdx) |
41796 | return SDValue(); |
41797 | |
41798 | SDValue Index = ExtElt->getOperand(1); |
41799 | assert(isNullConstant(Index) && |
41800 | "Reduction doesn't end in an extract from index 0"); |
41801 | |
41802 | EVT VT = ExtElt->getValueType(0); |
41803 | EVT VecVT = Rdx.getValueType(); |
41804 | if (VecVT.getScalarType() != VT) |
41805 | return SDValue(); |
41806 | |
41807 | SDLoc DL(ExtElt); |
41808 | |
41809 | |
41810 | if (Opc == ISD::MUL) { |
41811 | unsigned NumElts = VecVT.getVectorNumElements(); |
41812 | if (VT != MVT::i8 || NumElts < 4 || !isPowerOf2_32(NumElts)) |
41813 | return SDValue(); |
41814 | if (VecVT.getSizeInBits() >= 128) { |
41815 | EVT WideVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts / 2); |
41816 | SDValue Lo = getUnpackl(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT)); |
41817 | SDValue Hi = getUnpackh(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT)); |
41818 | Lo = DAG.getBitcast(WideVT, Lo); |
41819 | Hi = DAG.getBitcast(WideVT, Hi); |
41820 | Rdx = DAG.getNode(Opc, DL, WideVT, Lo, Hi); |
41821 | while (Rdx.getValueSizeInBits() > 128) { |
41822 | std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL); |
41823 | Rdx = DAG.getNode(Opc, DL, Lo.getValueType(), Lo, Hi); |
41824 | } |
41825 | } else { |
41826 | if (VecVT == MVT::v4i8) |
41827 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, |
41828 | DAG.getUNDEF(MVT::v4i8)); |
41829 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, |
41830 | DAG.getUNDEF(MVT::v8i8)); |
41831 | Rdx = getUnpackl(DAG, DL, MVT::v16i8, Rdx, DAG.getUNDEF(MVT::v16i8)); |
41832 | Rdx = DAG.getBitcast(MVT::v8i16, Rdx); |
41833 | } |
41834 | if (NumElts >= 8) |
41835 | Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx, |
41836 | DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx, |
41837 | {4, 5, 6, 7, -1, -1, -1, -1})); |
41838 | Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx, |
41839 | DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx, |
41840 | {2, 3, -1, -1, -1, -1, -1, -1})); |
41841 | Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx, |
41842 | DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx, |
41843 | {1, -1, -1, -1, -1, -1, -1, -1})); |
41844 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
41845 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
41846 | } |
41847 | |
41848 | |
41849 | if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) { |
41850 | if (VecVT == MVT::v4i8) { |
41851 | |
41852 | if (Subtarget.hasSSE41()) { |
41853 | Rdx = DAG.getBitcast(MVT::i32, Rdx); |
41854 | Rdx = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, |
41855 | DAG.getConstant(0, DL, MVT::v4i32), Rdx, |
41856 | DAG.getIntPtrConstant(0, DL)); |
41857 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
41858 | } else { |
41859 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, |
41860 | DAG.getConstant(0, DL, VecVT)); |
41861 | } |
41862 | } |
41863 | if (Rdx.getValueType() == MVT::v8i8) { |
41864 | |
41865 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, |
41866 | DAG.getUNDEF(MVT::v8i8)); |
41867 | } |
41868 | Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, |
41869 | DAG.getConstant(0, DL, MVT::v16i8)); |
41870 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
41871 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
41872 | } |
41873 | |
41874 | |
41875 | if ((VecVT.getSizeInBits() % 128) != 0 || |
41876 | !isPowerOf2_32(VecVT.getVectorNumElements())) |
41877 | return SDValue(); |
41878 | |
41879 | |
41880 | if (VT == MVT::i8) { |
41881 | while (Rdx.getValueSizeInBits() > 128) { |
41882 | SDValue Lo, Hi; |
41883 | std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL); |
41884 | VecVT = Lo.getValueType(); |
41885 | Rdx = DAG.getNode(ISD::ADD, DL, VecVT, Lo, Hi); |
41886 | } |
41887 | assert(VecVT == MVT::v16i8 && "v16i8 reduction expected"); |
41888 | |
41889 | SDValue Hi = DAG.getVectorShuffle( |
41890 | MVT::v16i8, DL, Rdx, Rdx, |
41891 | {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); |
41892 | Rdx = DAG.getNode(ISD::ADD, DL, MVT::v16i8, Rdx, Hi); |
41893 | Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, |
41894 | getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); |
41895 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
41896 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
41897 | } |
41898 | |
41899 | |
41900 | if (!shouldUseHorizontalOp(true, DAG, Subtarget)) |
41901 | return SDValue(); |
41902 | |
41903 | unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD; |
41904 | |
41905 | |
41906 | |
41907 | |
41908 | |
41909 | if (((VecVT == MVT::v16i16 || VecVT == MVT::v8i32) && Subtarget.hasSSSE3()) || |
41910 | ((VecVT == MVT::v8f32 || VecVT == MVT::v4f64) && Subtarget.hasSSE3())) { |
41911 | unsigned NumElts = VecVT.getVectorNumElements(); |
41912 | SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL); |
41913 | SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL); |
41914 | Rdx = DAG.getNode(HorizOpcode, DL, Lo.getValueType(), Hi, Lo); |
41915 | VecVT = Rdx.getValueType(); |
41916 | } |
41917 | if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) && |
41918 | !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3())) |
41919 | return SDValue(); |
41920 | |
41921 | |
41922 | unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements()); |
41923 | for (unsigned i = 0; i != ReductionSteps; ++i) |
41924 | Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx); |
41925 | |
41926 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
41927 | } |
41928 | |
41929 | |
41930 | |
41931 | |
41932 | |
41933 | static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, |
41934 | TargetLowering::DAGCombinerInfo &DCI, |
41935 | const X86Subtarget &Subtarget) { |
41936 | if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget)) |
41937 | return NewOp; |
41938 | |
41939 | SDValue InputVector = N->getOperand(0); |
41940 | SDValue EltIdx = N->getOperand(1); |
41941 | auto *CIdx = dyn_cast<ConstantSDNode>(EltIdx); |
41942 | |
41943 | EVT SrcVT = InputVector.getValueType(); |
41944 | EVT VT = N->getValueType(0); |
41945 | SDLoc dl(InputVector); |
41946 | bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT; |
41947 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
41948 | |
41949 | if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts)) |
41950 | return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); |
41951 | |
41952 | |
41953 | if (CIdx && VT.isInteger()) { |
41954 | APInt UndefVecElts; |
41955 | SmallVector<APInt, 16> EltBits; |
41956 | unsigned VecEltBitWidth = SrcVT.getScalarSizeInBits(); |
41957 | if (getTargetConstantBitsFromNode(InputVector, VecEltBitWidth, UndefVecElts, |
41958 | EltBits, true, false)) { |
41959 | uint64_t Idx = CIdx->getZExtValue(); |
41960 | if (UndefVecElts[Idx]) |
41961 | return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); |
41962 | return DAG.getConstant(EltBits[Idx].zextOrSelf(VT.getScalarSizeInBits()), |
41963 | dl, VT); |
41964 | } |
41965 | } |
41966 | |
41967 | if (IsPextr) { |
41968 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
41969 | if (TLI.SimplifyDemandedBits( |
41970 | SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI)) |
41971 | return SDValue(N, 0); |
41972 | |
41973 | |
41974 | if ((InputVector.getOpcode() == X86ISD::PINSRB || |
41975 | InputVector.getOpcode() == X86ISD::PINSRW) && |
41976 | InputVector.getOperand(2) == EltIdx) { |
41977 | assert(SrcVT == InputVector.getOperand(0).getValueType() && |
41978 | "Vector type mismatch"); |
41979 | SDValue Scl = InputVector.getOperand(1); |
41980 | Scl = DAG.getNode(ISD::TRUNCATE, dl, SrcVT.getScalarType(), Scl); |
41981 | return DAG.getZExtOrTrunc(Scl, dl, VT); |
41982 | } |
41983 | |
41984 | |
41985 | |
41986 | |
41987 | return SDValue(); |
41988 | } |
41989 | |
41990 | |
41991 | if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && |
41992 | VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) { |
41993 | SDValue MMXSrc = InputVector.getOperand(0); |
41994 | |
41995 | |
41996 | if (MMXSrc.getValueType() == MVT::x86mmx) |
41997 | return DAG.getBitcast(VT, InputVector); |
41998 | } |
41999 | |
42000 | |
42001 | if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && |
42002 | VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) { |
42003 | SDValue MMXSrc = InputVector.getOperand(0); |
42004 | |
42005 | |
42006 | if (MMXSrc.getValueType() == MVT::x86mmx) |
42007 | return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc); |
42008 | } |
42009 | |
42010 | |
42011 | |
42012 | |
42013 | if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget)) |
42014 | return SAD; |
42015 | |
42016 | |
42017 | if (SDValue Cmp = combinePredicateReduction(N, DAG, Subtarget)) |
42018 | return Cmp; |
42019 | |
42020 | |
42021 | if (SDValue MinMax = combineMinMaxReduction(N, DAG, Subtarget)) |
42022 | return MinMax; |
42023 | |
42024 | |
42025 | if (SDValue V = combineArithReduction(N, DAG, Subtarget)) |
42026 | return V; |
42027 | |
42028 | if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget)) |
42029 | return V; |
42030 | |
42031 | |
42032 | |
42033 | |
42034 | |
42035 | |
42036 | |
42037 | |
42038 | |
42039 | |
42040 | |
42041 | if (CIdx && SrcVT.getScalarType() == MVT::i1) { |
42042 | SmallVector<SDNode *, 16> BoolExtracts; |
42043 | unsigned ResNo = InputVector.getResNo(); |
42044 | auto IsBoolExtract = [&BoolExtracts, &ResNo](SDNode *Use) { |
42045 | if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
42046 | isa<ConstantSDNode>(Use->getOperand(1)) && |
42047 | Use->getOperand(0).getResNo() == ResNo && |
42048 | Use->getValueType(0) == MVT::i1) { |
42049 | BoolExtracts.push_back(Use); |
42050 | return true; |
42051 | } |
42052 | return false; |
42053 | }; |
42054 | if (all_of(InputVector->uses(), IsBoolExtract) && |
42055 | BoolExtracts.size() > 1) { |
42056 | EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts); |
42057 | if (SDValue BC = |
42058 | combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) { |
42059 | for (SDNode *Use : BoolExtracts) { |
42060 | |
42061 | unsigned MaskIdx = Use->getConstantOperandVal(1); |
42062 | APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx); |
42063 | SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT); |
42064 | SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask); |
42065 | Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ); |
42066 | DCI.CombineTo(Use, Res); |
42067 | } |
42068 | return SDValue(N, 0); |
42069 | } |
42070 | } |
42071 | } |
42072 | |
42073 | return SDValue(); |
42074 | } |
42075 | |
42076 | |
42077 | |
42078 | |
42079 | static SDValue |
42080 | combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, |
42081 | TargetLowering::DAGCombinerInfo &DCI, |
42082 | const X86Subtarget &Subtarget) { |
42083 | SDValue Cond = N->getOperand(0); |
42084 | SDValue LHS = N->getOperand(1); |
42085 | SDValue RHS = N->getOperand(2); |
42086 | EVT VT = LHS.getValueType(); |
42087 | EVT CondVT = Cond.getValueType(); |
42088 | SDLoc DL(N); |
42089 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
42090 | |
42091 | if (N->getOpcode() != ISD::VSELECT) |
42092 | return SDValue(); |
42093 | |
42094 | assert(CondVT.isVector() && "Vector select expects a vector selector!"); |
42095 | |
42096 | |
42097 | |
42098 | |
42099 | bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); |
42100 | bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); |
42101 | |
42102 | |
42103 | |
42104 | if (TValIsAllZeros && FValIsAllZeros) { |
42105 | if (VT.isFloatingPoint()) |
42106 | return DAG.getConstantFP(0.0, DL, VT); |
42107 | return DAG.getConstant(0, DL, VT); |
42108 | } |
42109 | |
42110 | |
42111 | |
42112 | |
42113 | |
42114 | |
42115 | if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) |
42116 | return SDValue(); |
42117 | |
42118 | |
42119 | |
42120 | bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); |
42121 | if (!TValIsAllOnes && !FValIsAllZeros && Cond.hasOneUse() && |
42122 | |
42123 | Cond.getOpcode() == ISD::SETCC && |
42124 | |
42125 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) == |
42126 | CondVT) { |
42127 | bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode()); |
42128 | |
42129 | if (TValIsAllZeros || FValIsAllOnes) { |
42130 | SDValue CC = Cond.getOperand(2); |
42131 | ISD::CondCode NewCC = ISD::getSetCCInverse( |
42132 | cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType()); |
42133 | Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), |
42134 | NewCC); |
42135 | std::swap(LHS, RHS); |
42136 | TValIsAllOnes = FValIsAllOnes; |
42137 | FValIsAllZeros = TValIsAllZeros; |
42138 | } |
42139 | } |
42140 | |
42141 | |
42142 | if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits()) |
42143 | return SDValue(); |
42144 | |
42145 | |
42146 | if (TValIsAllOnes && FValIsAllZeros) |
42147 | return DAG.getBitcast(VT, Cond); |
42148 | |
42149 | if (!TLI.isTypeLegal(CondVT)) |
42150 | return SDValue(); |
42151 | |
42152 | |
42153 | if (TValIsAllOnes) { |
42154 | SDValue CastRHS = DAG.getBitcast(CondVT, RHS); |
42155 | SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS); |
42156 | return DAG.getBitcast(VT, Or); |
42157 | } |
42158 | |
42159 | |
42160 | if (FValIsAllZeros) { |
42161 | SDValue CastLHS = DAG.getBitcast(CondVT, LHS); |
42162 | SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS); |
42163 | return DAG.getBitcast(VT, And); |
42164 | } |
42165 | |
42166 | |
42167 | if (TValIsAllZeros) { |
42168 | SDValue CastRHS = DAG.getBitcast(CondVT, RHS); |
42169 | SDValue AndN; |
42170 | |
42171 | if (CondVT.getScalarType() == MVT::i1) |
42172 | AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), |
42173 | CastRHS); |
42174 | else |
42175 | AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS); |
42176 | return DAG.getBitcast(VT, AndN); |
42177 | } |
42178 | |
42179 | return SDValue(); |
42180 | } |
42181 | |
42182 | |
42183 | |
42184 | |
42185 | |
42186 | static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, |
42187 | const X86Subtarget &Subtarget) { |
42188 | unsigned Opcode = N->getOpcode(); |
42189 | if (Opcode != X86ISD::BLENDV && Opcode != ISD::VSELECT) |
42190 | return SDValue(); |
42191 | |
42192 | |
42193 | EVT VT = N->getValueType(0); |
42194 | if (!VT.is256BitVector()) |
42195 | return SDValue(); |
42196 | |
42197 | |
42198 | SDValue Cond = N->getOperand(0); |
42199 | SDValue TVal = N->getOperand(1); |
42200 | SDValue FVal = N->getOperand(2); |
42201 | SmallVector<SDValue, 4> CatOpsT, CatOpsF; |
42202 | if (!TVal.hasOneUse() || !FVal.hasOneUse() || |
42203 | !collectConcatOps(TVal.getNode(), CatOpsT) || |
42204 | !collectConcatOps(FVal.getNode(), CatOpsF)) |
42205 | return SDValue(); |
42206 | |
42207 | auto makeBlend = [Opcode](SelectionDAG &DAG, const SDLoc &DL, |
42208 | ArrayRef<SDValue> Ops) { |
42209 | return DAG.getNode(Opcode, DL, Ops[1].getValueType(), Ops); |
42210 | }; |
42211 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { Cond, TVal, FVal }, |
42212 | makeBlend, false); |
42213 | } |
42214 | |
42215 | static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { |
42216 | SDValue Cond = N->getOperand(0); |
42217 | SDValue LHS = N->getOperand(1); |
42218 | SDValue RHS = N->getOperand(2); |
42219 | SDLoc DL(N); |
42220 | |
42221 | auto *TrueC = dyn_cast<ConstantSDNode>(LHS); |
42222 | auto *FalseC = dyn_cast<ConstantSDNode>(RHS); |
42223 | if (!TrueC || !FalseC) |
42224 | return SDValue(); |
42225 | |
42226 | |
42227 | EVT VT = N->getValueType(0); |
42228 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
42229 | return SDValue(); |
42230 | |
42231 | |
42232 | |
42233 | |
42234 | if (Cond.getValueType() != MVT::i1) |
42235 | return SDValue(); |
42236 | |
42237 | |
42238 | |
42239 | |
42240 | |
42241 | const APInt &TrueVal = TrueC->getAPIntValue(); |
42242 | const APInt &FalseVal = FalseC->getAPIntValue(); |
42243 | bool OV; |
42244 | APInt Diff = TrueVal.ssub_ov(FalseVal, OV); |
42245 | if (OV) |
42246 | return SDValue(); |
42247 | |
42248 | APInt AbsDiff = Diff.abs(); |
42249 | if (AbsDiff.isPowerOf2() || |
42250 | ((VT == MVT::i32 || VT == MVT::i64) && |
42251 | (AbsDiff == 3 || AbsDiff == 5 || AbsDiff == 9))) { |
42252 | |
42253 | |
42254 | |
42255 | |
42256 | if (TrueVal.slt(FalseVal)) { |
42257 | Cond = DAG.getNOT(DL, Cond, MVT::i1); |
42258 | std::swap(TrueC, FalseC); |
42259 | } |
42260 | |
42261 | |
42262 | SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); |
42263 | |
42264 | |
42265 | if (!AbsDiff.isOneValue()) |
42266 | R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT)); |
42267 | |
42268 | |
42269 | if (!FalseC->isNullValue()) |
42270 | R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0)); |
42271 | |
42272 | return R; |
42273 | } |
42274 | |
42275 | return SDValue(); |
42276 | } |
42277 | |
42278 | |
42279 | |
42280 | |
42281 | |
42282 | |
42283 | static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, |
42284 | TargetLowering::DAGCombinerInfo &DCI, |
42285 | const X86Subtarget &Subtarget) { |
42286 | SDValue Cond = N->getOperand(0); |
42287 | if ((N->getOpcode() != ISD::VSELECT && |
42288 | N->getOpcode() != X86ISD::BLENDV) || |
42289 | ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) |
42290 | return SDValue(); |
42291 | |
42292 | |
42293 | |
42294 | unsigned BitWidth = Cond.getScalarValueSizeInBits(); |
42295 | if (BitWidth < 8 || BitWidth > 64) |
42296 | return SDValue(); |
42297 | |
42298 | |
42299 | |
42300 | |
42301 | |
42302 | |
42303 | |
42304 | |
42305 | |
42306 | |
42307 | |
42308 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
42309 | EVT VT = N->getValueType(0); |
42310 | if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) |
42311 | return SDValue(); |
42312 | |
42313 | |
42314 | |
42315 | if (VT.getVectorElementType() == MVT::i16) |
42316 | return SDValue(); |
42317 | |
42318 | if (VT.is128BitVector() && !Subtarget.hasSSE41()) |
42319 | return SDValue(); |
42320 | |
42321 | if (VT == MVT::v32i8 && !Subtarget.hasAVX2()) |
42322 | return SDValue(); |
42323 | |
42324 | if (VT.is512BitVector()) |
42325 | return SDValue(); |
42326 | |
42327 | auto OnlyUsedAsSelectCond = [](SDValue Cond) { |
42328 | for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); |
42329 | UI != UE; ++UI) |
42330 | if ((UI->getOpcode() != ISD::VSELECT && |
42331 | UI->getOpcode() != X86ISD::BLENDV) || |
42332 | UI.getOperandNo() != 0) |
42333 | return false; |
42334 | |
42335 | return true; |
42336 | }; |
42337 | |
42338 | APInt DemandedBits(APInt::getSignMask(BitWidth)); |
42339 | |
42340 | if (OnlyUsedAsSelectCond(Cond)) { |
42341 | KnownBits Known; |
42342 | TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), |
42343 | !DCI.isBeforeLegalizeOps()); |
42344 | if (!TLI.SimplifyDemandedBits(Cond, DemandedBits, Known, TLO, 0, true)) |
42345 | return SDValue(); |
42346 | |
42347 | |
42348 | |
42349 | |
42350 | |
42351 | |
42352 | for (SDNode *U : Cond->uses()) { |
42353 | if (U->getOpcode() == X86ISD::BLENDV) |
42354 | continue; |
42355 | |
42356 | SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0), |
42357 | Cond, U->getOperand(1), U->getOperand(2)); |
42358 | DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB); |
42359 | DCI.AddToWorklist(U); |
42360 | } |
42361 | DCI.CommitTargetLoweringOpt(TLO); |
42362 | return SDValue(N, 0); |
42363 | } |
42364 | |
42365 | |
42366 | if (SDValue V = TLI.SimplifyMultipleUseDemandedBits(Cond, DemandedBits, DAG)) |
42367 | return DAG.getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), V, |
42368 | N->getOperand(1), N->getOperand(2)); |
42369 | |
42370 | return SDValue(); |
42371 | } |
42372 | |
42373 | |
42374 | |
42375 | |
42376 | |
42377 | |
42378 | |
42379 | |
42380 | |
42381 | |
42382 | |
42383 | |
42384 | |
42385 | |
42386 | |
42387 | |
42388 | |
42389 | static SDValue combineLogicBlendIntoConditionalNegate( |
42390 | EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL, |
42391 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
42392 | EVT MaskVT = Mask.getValueType(); |
42393 | assert(MaskVT.isInteger() && |
42394 | DAG.ComputeNumSignBits(Mask) == MaskVT.getScalarSizeInBits() && |
42395 | "Mask must be zero/all-bits"); |
42396 | |
42397 | if (X.getValueType() != MaskVT || Y.getValueType() != MaskVT) |
42398 | return SDValue(); |
42399 | if (!DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) |
42400 | return SDValue(); |
42401 | |
42402 | auto IsNegV = [](SDNode *N, SDValue V) { |
42403 | return N->getOpcode() == ISD::SUB && N->getOperand(1) == V && |
42404 | ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()); |
42405 | }; |
42406 | |
42407 | SDValue V; |
42408 | if (IsNegV(Y.getNode(), X)) |
42409 | V = X; |
42410 | else if (IsNegV(X.getNode(), Y)) |
42411 | V = Y; |
42412 | else |
42413 | return SDValue(); |
42414 | |
42415 | SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask); |
42416 | SDValue SubOp2 = Mask; |
42417 | |
42418 | |
42419 | |
42420 | |
42421 | |
42422 | |
42423 | |
42424 | |
42425 | |
42426 | |
42427 | |
42428 | if (V == Y) |
42429 | std::swap(SubOp1, SubOp2); |
42430 | |
42431 | SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2); |
42432 | return DAG.getBitcast(VT, Res); |
42433 | } |
42434 | |
42435 | |
42436 | static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, |
42437 | TargetLowering::DAGCombinerInfo &DCI, |
42438 | const X86Subtarget &Subtarget) { |
42439 | SDLoc DL(N); |
42440 | SDValue Cond = N->getOperand(0); |
42441 | SDValue LHS = N->getOperand(1); |
42442 | SDValue RHS = N->getOperand(2); |
42443 | |
42444 | |
42445 | |
42446 | if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS)) |
42447 | return V; |
42448 | |
42449 | EVT VT = LHS.getValueType(); |
42450 | EVT CondVT = Cond.getValueType(); |
42451 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
42452 | bool CondConstantVector = ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()); |
42453 | |
42454 | |
42455 | |
42456 | |
42457 | if (CondVT.isVector() && CondVT.isInteger() && |
42458 | CondVT.getScalarSizeInBits() == VT.getScalarSizeInBits() && |
42459 | (!CondConstantVector || CondVT.getScalarType() == MVT::i8) && |
42460 | DAG.ComputeNumSignBits(Cond) == CondVT.getScalarSizeInBits()) |
42461 | if (SDValue V = combineLogicBlendIntoConditionalNegate(VT, Cond, RHS, LHS, |
42462 | DL, DAG, Subtarget)) |
42463 | return V; |
42464 | |
42465 | |
42466 | if (CondConstantVector && DCI.isBeforeLegalizeOps()) { |
42467 | SmallVector<int, 64> Mask; |
42468 | if (createShuffleMaskFromVSELECT(Mask, Cond)) |
42469 | return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask); |
42470 | } |
42471 | |
42472 | |
42473 | |
42474 | |
42475 | if (N->getOpcode() == ISD::VSELECT && CondVT.isVector() && |
42476 | LHS.getOpcode() == X86ISD::PSHUFB && RHS.getOpcode() == X86ISD::PSHUFB && |
42477 | LHS.hasOneUse() && RHS.hasOneUse()) { |
42478 | MVT SimpleVT = VT.getSimpleVT(); |
42479 | SmallVector<SDValue, 1> LHSOps, RHSOps; |
42480 | SmallVector<int, 64> LHSMask, RHSMask, CondMask; |
42481 | if (createShuffleMaskFromVSELECT(CondMask, Cond) && |
42482 | getTargetShuffleMask(LHS.getNode(), SimpleVT, true, LHSOps, LHSMask) && |
42483 | getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask)) { |
42484 | int NumElts = VT.getVectorNumElements(); |
42485 | for (int i = 0; i != NumElts; ++i) { |
42486 | if (CondMask[i] < NumElts) |
42487 | RHSMask[i] = 0x80; |
42488 | else |
42489 | LHSMask[i] = 0x80; |
42490 | } |
42491 | LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0), |
42492 | getConstVector(LHSMask, SimpleVT, DAG, DL, true)); |
42493 | RHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, RHS.getOperand(0), |
42494 | getConstVector(RHSMask, SimpleVT, DAG, DL, true)); |
42495 | return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); |
42496 | } |
42497 | } |
42498 | |
42499 | |
42500 | |
42501 | |
42502 | |
42503 | |
42504 | if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && |
42505 | VT != MVT::f80 && VT != MVT::f128 && |
42506 | (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && |
42507 | (Subtarget.hasSSE2() || |
42508 | (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) { |
42509 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
42510 | |
42511 | unsigned Opcode = 0; |
42512 | |
42513 | if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && |
42514 | DAG.isEqualTo(RHS, Cond.getOperand(1))) { |
42515 | switch (CC) { |
42516 | default: break; |
42517 | case ISD::SETULT: |
42518 | |
42519 | |
42520 | |
42521 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { |
42522 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
42523 | !(DAG.isKnownNeverZeroFloat(LHS) || |
42524 | DAG.isKnownNeverZeroFloat(RHS))) |
42525 | break; |
42526 | std::swap(LHS, RHS); |
42527 | } |
42528 | Opcode = X86ISD::FMIN; |
42529 | break; |
42530 | case ISD::SETOLE: |
42531 | |
42532 | |
42533 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
42534 | !DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS)) |
42535 | break; |
42536 | Opcode = X86ISD::FMIN; |
42537 | break; |
42538 | case ISD::SETULE: |
42539 | |
42540 | |
42541 | std::swap(LHS, RHS); |
42542 | LLVM_FALLTHROUGH; |
42543 | case ISD::SETOLT: |
42544 | case ISD::SETLT: |
42545 | case ISD::SETLE: |
42546 | Opcode = X86ISD::FMIN; |
42547 | break; |
42548 | |
42549 | case ISD::SETOGE: |
42550 | |
42551 | |
42552 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
42553 | !DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS)) |
42554 | break; |
42555 | Opcode = X86ISD::FMAX; |
42556 | break; |
42557 | case ISD::SETUGT: |
42558 | |
42559 | |
42560 | |
42561 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { |
42562 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
42563 | !(DAG.isKnownNeverZeroFloat(LHS) || |
42564 | DAG.isKnownNeverZeroFloat(RHS))) |
42565 | break; |
42566 | std::swap(LHS, RHS); |
42567 | } |
42568 | Opcode = X86ISD::FMAX; |
42569 | break; |
42570 | case ISD::SETUGE: |
42571 | |
42572 | |
42573 | std::swap(LHS, RHS); |
42574 | LLVM_FALLTHROUGH; |
42575 | case ISD::SETOGT: |
42576 | case ISD::SETGT: |
42577 | case ISD::SETGE: |
42578 | Opcode = X86ISD::FMAX; |
42579 | break; |
42580 | } |
42581 | |
42582 | } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && |
42583 | DAG.isEqualTo(RHS, Cond.getOperand(0))) { |
42584 | switch (CC) { |
42585 | default: break; |
42586 | case ISD::SETOGE: |
42587 | |
42588 | |
42589 | |
42590 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
42591 | !(DAG.isKnownNeverZeroFloat(LHS) || |
42592 | DAG.isKnownNeverZeroFloat(RHS))) { |
42593 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
42594 | break; |
42595 | std::swap(LHS, RHS); |
42596 | } |
42597 | Opcode = X86ISD::FMIN; |
42598 | break; |
42599 | case ISD::SETUGT: |
42600 | |
42601 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
42602 | break; |
42603 | Opcode = X86ISD::FMIN; |
42604 | break; |
42605 | case ISD::SETUGE: |
42606 | |
42607 | |
42608 | std::swap(LHS, RHS); |
42609 | LLVM_FALLTHROUGH; |
42610 | case ISD::SETOGT: |
42611 | case ISD::SETGT: |
42612 | case ISD::SETGE: |
42613 | Opcode = X86ISD::FMIN; |
42614 | break; |
42615 | |
42616 | case ISD::SETULT: |
42617 | |
42618 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
42619 | break; |
42620 | Opcode = X86ISD::FMAX; |
42621 | break; |
42622 | case ISD::SETOLE: |
42623 | |
42624 | |
42625 | |
42626 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
42627 | !DAG.isKnownNeverZeroFloat(LHS) && |
42628 | !DAG.isKnownNeverZeroFloat(RHS)) { |
42629 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
42630 | break; |
42631 | std::swap(LHS, RHS); |
42632 | } |
42633 | Opcode = X86ISD::FMAX; |
42634 | break; |
42635 | case ISD::SETULE: |
42636 | |
42637 | |
42638 | std::swap(LHS, RHS); |
42639 | LLVM_FALLTHROUGH; |
42640 | case ISD::SETOLT: |
42641 | case ISD::SETLT: |
42642 | case ISD::SETLE: |
42643 | Opcode = X86ISD::FMAX; |
42644 | break; |
42645 | } |
42646 | } |
42647 | |
42648 | if (Opcode) |
42649 | return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); |
42650 | } |
42651 | |
42652 | |
42653 | |
42654 | |
42655 | |
42656 | |
42657 | if (Subtarget.hasAVX512() && N->getOpcode() == ISD::SELECT && |
42658 | Cond.getOpcode() == ISD::SETCC && (VT == MVT::f32 || VT == MVT::f64)) { |
42659 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
42660 | SDValue AndNode = Cond.getOperand(0); |
42661 | if (AndNode.getOpcode() == ISD::AND && CC == ISD::SETEQ && |
42662 | isNullConstant(Cond.getOperand(1)) && |
42663 | isOneConstant(AndNode.getOperand(1))) { |
42664 | |
42665 | |
42666 | AndNode = DAG.getZExtOrTrunc(AndNode, DL, MVT::i8); |
42667 | return DAG.getNode(ISD::SELECT, DL, VT, AndNode, RHS, LHS); |
42668 | } |
42669 | } |
42670 | |
42671 | |
42672 | |
42673 | |
42674 | |
42675 | |
42676 | |
42677 | |
42678 | if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() && |
42679 | CondVT.getVectorElementType() == MVT::i1 && |
42680 | (VT.getVectorElementType() == MVT::i8 || |
42681 | VT.getVectorElementType() == MVT::i16)) { |
42682 | Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); |
42683 | return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS); |
42684 | } |
42685 | |
42686 | |
42687 | |
42688 | |
42689 | |
42690 | if (Subtarget.hasAVX512() && CondVT.isVector() && |
42691 | CondVT.getVectorElementType() == MVT::i1) { |
42692 | auto SelectableOp = [&TLI](SDValue Op) { |
42693 | return Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
42694 | isTargetShuffle(Op.getOperand(0).getOpcode()) && |
42695 | isNullConstant(Op.getOperand(1)) && |
42696 | TLI.isTypeLegal(Op.getOperand(0).getValueType()) && |
42697 | Op.hasOneUse() && Op.getOperand(0).hasOneUse(); |
42698 | }; |
42699 | |
42700 | bool SelectableLHS = SelectableOp(LHS); |
42701 | bool SelectableRHS = SelectableOp(RHS); |
42702 | bool ZeroLHS = ISD::isBuildVectorAllZeros(LHS.getNode()); |
42703 | bool ZeroRHS = ISD::isBuildVectorAllZeros(RHS.getNode()); |
42704 | |
42705 | if ((SelectableLHS && ZeroRHS) || (SelectableRHS && ZeroLHS)) { |
42706 | EVT SrcVT = SelectableLHS ? LHS.getOperand(0).getValueType() |
42707 | : RHS.getOperand(0).getValueType(); |
42708 | EVT SrcCondVT = SrcVT.changeVectorElementType(MVT::i1); |
42709 | LHS = insertSubVector(DAG.getUNDEF(SrcVT), LHS, 0, DAG, DL, |
42710 | VT.getSizeInBits()); |
42711 | RHS = insertSubVector(DAG.getUNDEF(SrcVT), RHS, 0, DAG, DL, |
42712 | VT.getSizeInBits()); |
42713 | Cond = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcCondVT, |
42714 | DAG.getUNDEF(SrcCondVT), Cond, |
42715 | DAG.getIntPtrConstant(0, DL)); |
42716 | SDValue Res = DAG.getSelect(DL, SrcVT, Cond, LHS, RHS); |
42717 | return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits()); |
42718 | } |
42719 | } |
42720 | |
42721 | if (SDValue V = combineSelectOfTwoConstants(N, DAG)) |
42722 | return V; |
42723 | |
42724 | if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && |
42725 | Cond.hasOneUse()) { |
42726 | EVT CondVT = Cond.getValueType(); |
42727 | SDValue Cond0 = Cond.getOperand(0); |
42728 | SDValue Cond1 = Cond.getOperand(1); |
42729 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
42730 | |
42731 | |
42732 | |
42733 | |
42734 | |
42735 | |
42736 | |
42737 | |
42738 | |
42739 | |
42740 | |
42741 | |
42742 | |
42743 | |
42744 | |
42745 | |
42746 | |
42747 | |
42748 | |
42749 | |
42750 | if (LHS == Cond0 && RHS == Cond1) { |
42751 | if ((CC == ISD::SETGT && (isNullConstant(RHS) || isOneConstant(RHS))) || |
42752 | (CC == ISD::SETLT && isAllOnesConstant(RHS))) { |
42753 | ISD::CondCode NewCC = CC == ISD::SETGT ? ISD::SETGE : ISD::SETLE; |
42754 | Cond = DAG.getSetCC(SDLoc(Cond), CondVT, Cond0, Cond1, NewCC); |
42755 | return DAG.getSelect(DL, VT, Cond, LHS, RHS); |
42756 | } |
42757 | if (CC == ISD::SETUGT && isOneConstant(RHS)) { |
42758 | ISD::CondCode NewCC = ISD::SETUGE; |
42759 | Cond = DAG.getSetCC(SDLoc(Cond), CondVT, Cond0, Cond1, NewCC); |
42760 | return DAG.getSelect(DL, VT, Cond, LHS, RHS); |
42761 | } |
42762 | } |
42763 | |
42764 | |
42765 | |
42766 | |
42767 | |
42768 | |
42769 | |
42770 | |
42771 | if (RHS.getOpcode() == ISD::SELECT && RHS.getOperand(1) == LHS && |
42772 | RHS.getOperand(0).getOpcode() == ISD::SETCC) { |
42773 | SDValue InnerSetCC = RHS.getOperand(0); |
42774 | ISD::CondCode InnerCC = |
42775 | cast<CondCodeSDNode>(InnerSetCC.getOperand(2))->get(); |
42776 | if ((CC == ISD::SETEQ || InnerCC == ISD::SETEQ) && |
42777 | Cond0 == InnerSetCC.getOperand(0) && |
42778 | Cond1 == InnerSetCC.getOperand(1)) { |
42779 | ISD::CondCode NewCC; |
42780 | switch (CC == ISD::SETEQ ? InnerCC : CC) { |
42781 | case ISD::SETGT: NewCC = ISD::SETGE; break; |
42782 | case ISD::SETLT: NewCC = ISD::SETLE; break; |
42783 | case ISD::SETUGT: NewCC = ISD::SETUGE; break; |
42784 | case ISD::SETULT: NewCC = ISD::SETULE; break; |
42785 | default: NewCC = ISD::SETCC_INVALID; break; |
42786 | } |
42787 | if (NewCC != ISD::SETCC_INVALID) { |
42788 | Cond = DAG.getSetCC(DL, CondVT, Cond0, Cond1, NewCC); |
42789 | return DAG.getSelect(DL, VT, Cond, LHS, RHS.getOperand(2)); |
42790 | } |
42791 | } |
42792 | } |
42793 | } |
42794 | |
42795 | |
42796 | |
42797 | |
42798 | if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() && |
42799 | Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 && |
42800 | ISD::isBuildVectorAllZeros(LHS.getNode()) && |
42801 | !ISD::isBuildVectorAllZeros(RHS.getNode())) { |
42802 | |
42803 | SDValue CondNew = DAG.getNOT(DL, Cond, CondVT); |
42804 | |
42805 | return DAG.getSelect(DL, VT, CondNew, RHS, LHS); |
42806 | } |
42807 | |
42808 | |
42809 | if (!TLI.isTypeLegal(VT)) |
42810 | return SDValue(); |
42811 | |
42812 | if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) |
42813 | return V; |
42814 | |
42815 | if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget)) |
42816 | return V; |
42817 | |
42818 | if (SDValue V = narrowVectorSelect(N, DAG, Subtarget)) |
42819 | return V; |
42820 | |
42821 | |
42822 | if (CondVT.getScalarType() != MVT::i1) { |
42823 | if (SDValue CondNot = IsNOT(Cond, DAG)) |
42824 | return DAG.getNode(N->getOpcode(), DL, VT, |
42825 | DAG.getBitcast(CondVT, CondNot), RHS, LHS); |
42826 | |
42827 | if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse() && |
42828 | ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) { |
42829 | Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT, |
42830 | DAG.getConstant(0, DL, CondVT), Cond.getOperand(0)); |
42831 | return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS); |
42832 | } |
42833 | } |
42834 | |
42835 | |
42836 | |
42837 | |
42838 | |
42839 | |
42840 | if (N->getOpcode() == ISD::SELECT && VT.isVector() && |
42841 | VT.getVectorElementType() == MVT::i1 && |
42842 | (DCI.isBeforeLegalize() || (VT != MVT::v64i1 || Subtarget.is64Bit()))) { |
42843 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); |
42844 | bool LHSIsConst = ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()); |
42845 | bool RHSIsConst = ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()); |
42846 | |
42847 | if ((LHSIsConst || |
42848 | (LHS.getOpcode() == ISD::BITCAST && |
42849 | LHS.getOperand(0).getValueType() == IntVT)) && |
42850 | (RHSIsConst || |
42851 | (RHS.getOpcode() == ISD::BITCAST && |
42852 | RHS.getOperand(0).getValueType() == IntVT))) { |
42853 | if (LHSIsConst) |
42854 | LHS = combinevXi1ConstantToInteger(LHS, DAG); |
42855 | else |
42856 | LHS = LHS.getOperand(0); |
42857 | |
42858 | if (RHSIsConst) |
42859 | RHS = combinevXi1ConstantToInteger(RHS, DAG); |
42860 | else |
42861 | RHS = RHS.getOperand(0); |
42862 | |
42863 | SDValue Select = DAG.getSelect(DL, IntVT, Cond, LHS, RHS); |
42864 | return DAG.getBitcast(VT, Select); |
42865 | } |
42866 | } |
42867 | |
42868 | |
42869 | |
42870 | |
42871 | if (DCI.isBeforeLegalize() && !Subtarget.hasAVX512() && |
42872 | N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && |
42873 | Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1 && |
42874 | Cond.getOperand(0).getOpcode() == ISD::AND && |
42875 | isNullOrNullSplat(Cond.getOperand(1)) && |
42876 | cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ && |
42877 | Cond.getOperand(0).getValueType() == VT) { |
42878 | |
42879 | SDValue And = Cond.getOperand(0); |
42880 | auto *C = isConstOrConstSplat(And.getOperand(1)); |
42881 | if (C && C->getAPIntValue().isPowerOf2()) { |
42882 | |
42883 | SDValue NotCond = |
42884 | DAG.getSetCC(DL, CondVT, And, Cond.getOperand(1), ISD::SETNE); |
42885 | return DAG.getSelect(DL, VT, NotCond, RHS, LHS); |
42886 | } |
42887 | |
42888 | |
42889 | |
42890 | |
42891 | unsigned EltBitWidth = VT.getScalarSizeInBits(); |
42892 | bool CanShiftBlend = |
42893 | TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) || |
42894 | (Subtarget.hasAVX2() && EltBitWidth == 64) || |
42895 | (Subtarget.hasXOP())); |
42896 | if (CanShiftBlend && |
42897 | ISD::matchUnaryPredicate(And.getOperand(1), [](ConstantSDNode *C) { |
42898 | return C->getAPIntValue().isPowerOf2(); |
42899 | })) { |
42900 | |
42901 | SDValue Mask = And.getOperand(1); |
42902 | SmallVector<int, 32> ShlVals; |
42903 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { |
42904 | auto *MaskVal = cast<ConstantSDNode>(Mask.getOperand(i)); |
42905 | ShlVals.push_back(EltBitWidth - 1 - |
42906 | MaskVal->getAPIntValue().exactLogBase2()); |
42907 | } |
42908 | |
42909 | SDValue ShlAmt = getConstVector(ShlVals, VT.getSimpleVT(), DAG, DL); |
42910 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And.getOperand(0), ShlAmt); |
42911 | SDValue NewCond = |
42912 | DAG.getSetCC(DL, CondVT, Shl, Cond.getOperand(1), ISD::SETLT); |
42913 | return DAG.getSelect(DL, VT, NewCond, RHS, LHS); |
42914 | } |
42915 | } |
42916 | |
42917 | return SDValue(); |
42918 | } |
42919 | |
42920 | |
42921 | |
42922 | |
42923 | |
42924 | |
42925 | |
42926 | static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, |
42927 | SelectionDAG &DAG, |
42928 | const X86Subtarget &Subtarget) { |
42929 | |
42930 | if (!(Cmp.getOpcode() == X86ISD::CMP || |
42931 | (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) |
42932 | return SDValue(); |
42933 | |
42934 | |
42935 | |
42936 | |
42937 | if (!Cmp.hasOneUse()) |
42938 | return SDValue(); |
42939 | |
42940 | |
42941 | |
42942 | |
42943 | |
42944 | |
42945 | |
42946 | |
42947 | |
42948 | |
42949 | |
42950 | |
42951 | |
42952 | SDValue CmpLHS = Cmp.getOperand(0); |
42953 | SDValue CmpRHS = Cmp.getOperand(1); |
42954 | EVT CmpVT = CmpLHS.getValueType(); |
42955 | |
42956 | if (!CmpLHS.hasOneUse()) |
42957 | return SDValue(); |
42958 | |
42959 | unsigned Opc = CmpLHS.getOpcode(); |
42960 | if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB) |
42961 | return SDValue(); |
42962 | |
42963 | SDValue OpRHS = CmpLHS.getOperand(2); |
42964 | auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS); |
42965 | if (!OpRHSC) |
42966 | return SDValue(); |
42967 | |
42968 | APInt Addend = OpRHSC->getAPIntValue(); |
42969 | if (Opc == ISD::ATOMIC_LOAD_SUB) |
42970 | Addend = -Addend; |
42971 | |
42972 | auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS); |
42973 | if (!CmpRHSC) |
42974 | return SDValue(); |
42975 | |
42976 | APInt Comparison = CmpRHSC->getAPIntValue(); |
42977 | APInt NegAddend = -Addend; |
42978 | |
42979 | |
42980 | |
42981 | if (Comparison != NegAddend) { |
42982 | APInt IncComparison = Comparison + 1; |
42983 | if (IncComparison == NegAddend) { |
42984 | if (CC == X86::COND_A && !Comparison.isMaxValue()) { |
42985 | Comparison = IncComparison; |
42986 | CC = X86::COND_AE; |
42987 | } else if (CC == X86::COND_LE && !Comparison.isMaxSignedValue()) { |
42988 | Comparison = IncComparison; |
42989 | CC = X86::COND_L; |
42990 | } |
42991 | } |
42992 | APInt DecComparison = Comparison - 1; |
42993 | if (DecComparison == NegAddend) { |
42994 | if (CC == X86::COND_AE && !Comparison.isMinValue()) { |
42995 | Comparison = DecComparison; |
42996 | CC = X86::COND_A; |
42997 | } else if (CC == X86::COND_L && !Comparison.isMinSignedValue()) { |
42998 | Comparison = DecComparison; |
42999 | CC = X86::COND_LE; |
43000 | } |
43001 | } |
43002 | } |
43003 | |
43004 | |
43005 | |
43006 | if (Comparison == NegAddend) { |
43007 | |
43008 | |
43009 | auto *AN = cast<AtomicSDNode>(CmpLHS.getNode()); |
43010 | auto AtomicSub = DAG.getAtomic( |
43011 | ISD::ATOMIC_LOAD_SUB, SDLoc(CmpLHS), CmpVT, |
43012 | CmpLHS.getOperand(0), CmpLHS.getOperand(1), |
43013 | DAG.getConstant(NegAddend, SDLoc(CmpRHS), CmpVT), |
43014 | AN->getMemOperand()); |
43015 | auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget); |
43016 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpVT)); |
43017 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); |
43018 | return LockOp; |
43019 | } |
43020 | |
43021 | |
43022 | |
43023 | if (!Comparison.isNullValue()) |
43024 | return SDValue(); |
43025 | |
43026 | if (CC == X86::COND_S && Addend == 1) |
43027 | CC = X86::COND_LE; |
43028 | else if (CC == X86::COND_NS && Addend == 1) |
43029 | CC = X86::COND_G; |
43030 | else if (CC == X86::COND_G && Addend == -1) |
43031 | CC = X86::COND_GE; |
43032 | else if (CC == X86::COND_LE && Addend == -1) |
43033 | CC = X86::COND_L; |
43034 | else |
43035 | return SDValue(); |
43036 | |
43037 | SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG, Subtarget); |
43038 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpVT)); |
43039 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); |
43040 | return LockOp; |
43041 | } |
43042 | |
43043 | |
43044 | |
43045 | |
43046 | |
43047 | |
43048 | |
43049 | |
43050 | |
43051 | |
43052 | |
43053 | |
43054 | |
43055 | |
43056 | |
43057 | |
43058 | static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { |
43059 | |
43060 | if (!(Cmp.getOpcode() == X86ISD::CMP || |
43061 | (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) |
43062 | return SDValue(); |
43063 | |
43064 | |
43065 | if (CC != X86::COND_E && CC != X86::COND_NE) |
43066 | return SDValue(); |
43067 | |
43068 | |
43069 | |
43070 | SDValue Op1 = Cmp.getOperand(0); |
43071 | SDValue Op2 = Cmp.getOperand(1); |
43072 | |
43073 | SDValue SetCC; |
43074 | const ConstantSDNode* C = nullptr; |
43075 | bool needOppositeCond = (CC == X86::COND_E); |
43076 | bool checkAgainstTrue = false; |
43077 | |
43078 | if ((C = dyn_cast<ConstantSDNode>(Op1))) |
43079 | SetCC = Op2; |
43080 | else if ((C = dyn_cast<ConstantSDNode>(Op2))) |
43081 | SetCC = Op1; |
43082 | else |
43083 | return SDValue(); |
43084 | |
43085 | if (C->getZExtValue() == 1) { |
43086 | needOppositeCond = !needOppositeCond; |
43087 | checkAgainstTrue = true; |
43088 | } else if (C->getZExtValue() != 0) |
43089 | |
43090 | return SDValue(); |
43091 | |
43092 | bool truncatedToBoolWithAnd = false; |
43093 | |
43094 | while (SetCC.getOpcode() == ISD::ZERO_EXTEND || |
43095 | SetCC.getOpcode() == ISD::TRUNCATE || |
43096 | SetCC.getOpcode() == ISD::AND) { |
43097 | if (SetCC.getOpcode() == ISD::AND) { |
43098 | int OpIdx = -1; |
43099 | if (isOneConstant(SetCC.getOperand(0))) |
43100 | OpIdx = 1; |
43101 | if (isOneConstant(SetCC.getOperand(1))) |
43102 | OpIdx = 0; |
43103 | if (OpIdx < 0) |
43104 | break; |
43105 | SetCC = SetCC.getOperand(OpIdx); |
43106 | truncatedToBoolWithAnd = true; |
43107 | } else |
43108 | SetCC = SetCC.getOperand(0); |
43109 | } |
43110 | |
43111 | switch (SetCC.getOpcode()) { |
43112 | case X86ISD::SETCC_CARRY: |
43113 | |
43114 | |
43115 | |
43116 | |
43117 | if (checkAgainstTrue && !truncatedToBoolWithAnd) |
43118 | break; |
43119 | assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B && |
43120 | "Invalid use of SETCC_CARRY!"); |
43121 | LLVM_FALLTHROUGH; |
43122 | case X86ISD::SETCC: |
43123 | |
43124 | CC = X86::CondCode(SetCC.getConstantOperandVal(0)); |
43125 | if (needOppositeCond) |
43126 | CC = X86::GetOppositeBranchCondition(CC); |
43127 | return SetCC.getOperand(1); |
43128 | case X86ISD::CMOV: { |
43129 | |
43130 | ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0)); |
43131 | ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1)); |
43132 | |
43133 | if (!TVal) |
43134 | return SDValue(); |
43135 | |
43136 | if (!FVal) { |
43137 | SDValue Op = SetCC.getOperand(0); |
43138 | |
43139 | if (Op.getOpcode() == ISD::ZERO_EXTEND || |
43140 | Op.getOpcode() == ISD::TRUNCATE) |
43141 | Op = Op.getOperand(0); |
43142 | |
43143 | |
43144 | if ((Op.getOpcode() != X86ISD::RDRAND && |
43145 | Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0) |
43146 | return SDValue(); |
43147 | } |
43148 | |
43149 | bool FValIsFalse = true; |
43150 | if (FVal && FVal->getZExtValue() != 0) { |
43151 | if (FVal->getZExtValue() != 1) |
43152 | return SDValue(); |
43153 | |
43154 | needOppositeCond = !needOppositeCond; |
43155 | FValIsFalse = false; |
43156 | } |
43157 | |
43158 | if (FValIsFalse && TVal->getZExtValue() != 1) |
43159 | return SDValue(); |
43160 | if (!FValIsFalse && TVal->getZExtValue() != 0) |
43161 | return SDValue(); |
43162 | CC = X86::CondCode(SetCC.getConstantOperandVal(2)); |
43163 | if (needOppositeCond) |
43164 | CC = X86::GetOppositeBranchCondition(CC); |
43165 | return SetCC.getOperand(3); |
43166 | } |
43167 | } |
43168 | |
43169 | return SDValue(); |
43170 | } |
43171 | |
43172 | |
43173 | |
43174 | |
43175 | |
43176 | static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, |
43177 | X86::CondCode &CC1, SDValue &Flags, |
43178 | bool &isAnd) { |
43179 | if (Cond->getOpcode() == X86ISD::CMP) { |
43180 | if (!isNullConstant(Cond->getOperand(1))) |
43181 | return false; |
43182 | |
43183 | Cond = Cond->getOperand(0); |
43184 | } |
43185 | |
43186 | isAnd = false; |
43187 | |
43188 | SDValue SetCC0, SetCC1; |
43189 | switch (Cond->getOpcode()) { |
43190 | default: return false; |
43191 | case ISD::AND: |
43192 | case X86ISD::AND: |
43193 | isAnd = true; |
43194 | LLVM_FALLTHROUGH; |
43195 | case ISD::OR: |
43196 | case X86ISD::OR: |
43197 | SetCC0 = Cond->getOperand(0); |
43198 | SetCC1 = Cond->getOperand(1); |
43199 | break; |
43200 | }; |
43201 | |
43202 | |
43203 | if (SetCC0.getOpcode() != X86ISD::SETCC || |
43204 | SetCC1.getOpcode() != X86ISD::SETCC || |
43205 | SetCC0->getOperand(1) != SetCC1->getOperand(1)) |
43206 | return false; |
43207 | |
43208 | CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0); |
43209 | CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0); |
43210 | Flags = SetCC0->getOperand(1); |
43211 | return true; |
43212 | } |
43213 | |
43214 | |
43215 | |
43216 | |
43217 | static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) { |
43218 | if (EFLAGS.getOpcode() == X86ISD::ADD) { |
43219 | if (isAllOnesConstant(EFLAGS.getOperand(1))) { |
43220 | SDValue Carry = EFLAGS.getOperand(0); |
43221 | while (Carry.getOpcode() == ISD::TRUNCATE || |
43222 | Carry.getOpcode() == ISD::ZERO_EXTEND || |
43223 | Carry.getOpcode() == ISD::SIGN_EXTEND || |
43224 | Carry.getOpcode() == ISD::ANY_EXTEND || |
43225 | (Carry.getOpcode() == ISD::AND && |
43226 | isOneConstant(Carry.getOperand(1)))) |
43227 | Carry = Carry.getOperand(0); |
43228 | if (Carry.getOpcode() == X86ISD::SETCC || |
43229 | Carry.getOpcode() == X86ISD::SETCC_CARRY) { |
43230 | |
43231 | uint64_t CarryCC = Carry.getConstantOperandVal(0); |
43232 | SDValue CarryOp1 = Carry.getOperand(1); |
43233 | if (CarryCC == X86::COND_B) |
43234 | return CarryOp1; |
43235 | if (CarryCC == X86::COND_A) { |
43236 | |
43237 | |
43238 | |
43239 | |
43240 | |
43241 | |
43242 | if (CarryOp1.getOpcode() == X86ISD::SUB && |
43243 | CarryOp1.getNode()->hasOneUse() && |
43244 | CarryOp1.getValueType().isInteger() && |
43245 | !isa<ConstantSDNode>(CarryOp1.getOperand(1))) { |
43246 | SDValue SubCommute = |
43247 | DAG.getNode(X86ISD::SUB, SDLoc(CarryOp1), CarryOp1->getVTList(), |
43248 | CarryOp1.getOperand(1), CarryOp1.getOperand(0)); |
43249 | return SDValue(SubCommute.getNode(), CarryOp1.getResNo()); |
43250 | } |
43251 | } |
43252 | |
43253 | |
43254 | if (CarryCC == X86::COND_E && |
43255 | CarryOp1.getOpcode() == X86ISD::ADD && |
43256 | isOneConstant(CarryOp1.getOperand(1))) |
43257 | return CarryOp1; |
43258 | } |
43259 | } |
43260 | } |
43261 | |
43262 | return SDValue(); |
43263 | } |
43264 | |
43265 | |
43266 | |
43267 | static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, |
43268 | SelectionDAG &DAG, |
43269 | const X86Subtarget &Subtarget) { |
43270 | |
43271 | if (EFLAGS.getOpcode() != X86ISD::PTEST && |
43272 | EFLAGS.getOpcode() != X86ISD::TESTP) |
43273 | return SDValue(); |
43274 | |
43275 | |
43276 | |
43277 | |
43278 | |
43279 | EVT VT = EFLAGS.getValueType(); |
43280 | SDValue Op0 = EFLAGS.getOperand(0); |
43281 | SDValue Op1 = EFLAGS.getOperand(1); |
43282 | EVT OpVT = Op0.getValueType(); |
43283 | |
43284 | |
43285 | if (SDValue NotOp0 = IsNOT(Op0, DAG)) { |
43286 | X86::CondCode InvCC; |
43287 | switch (CC) { |
43288 | case X86::COND_B: |
43289 | |
43290 | InvCC = X86::COND_E; |
43291 | break; |
43292 | case X86::COND_AE: |
43293 | |
43294 | InvCC = X86::COND_NE; |
43295 | break; |
43296 | case X86::COND_E: |
43297 | |
43298 | InvCC = X86::COND_B; |
43299 | break; |
43300 | case X86::COND_NE: |
43301 | |
43302 | InvCC = X86::COND_AE; |
43303 | break; |
43304 | case X86::COND_A: |
43305 | case X86::COND_BE: |
43306 | |
43307 | InvCC = CC; |
43308 | break; |
43309 | default: |
43310 | InvCC = X86::COND_INVALID; |
43311 | break; |
43312 | } |
43313 | |
43314 | if (InvCC != X86::COND_INVALID) { |
43315 | CC = InvCC; |
43316 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
43317 | DAG.getBitcast(OpVT, NotOp0), Op1); |
43318 | } |
43319 | } |
43320 | |
43321 | if (CC == X86::COND_E || CC == X86::COND_NE) { |
43322 | |
43323 | if (SDValue NotOp1 = IsNOT(Op1, DAG)) { |
43324 | CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE); |
43325 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
43326 | DAG.getBitcast(OpVT, NotOp1), Op0); |
43327 | } |
43328 | |
43329 | if (Op0 == Op1) { |
43330 | SDValue BC = peekThroughBitcasts(Op0); |
43331 | EVT BCVT = BC.getValueType(); |
43332 | assert(BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT) && |
43333 | "Unexpected vector type"); |
43334 | |
43335 | |
43336 | if (BC.getOpcode() == ISD::AND || BC.getOpcode() == X86ISD::FAND) { |
43337 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
43338 | DAG.getBitcast(OpVT, BC.getOperand(0)), |
43339 | DAG.getBitcast(OpVT, BC.getOperand(1))); |
43340 | } |
43341 | |
43342 | |
43343 | if (BC.getOpcode() == X86ISD::ANDNP || BC.getOpcode() == X86ISD::FANDN) { |
43344 | CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE); |
43345 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
43346 | DAG.getBitcast(OpVT, BC.getOperand(0)), |
43347 | DAG.getBitcast(OpVT, BC.getOperand(1))); |
43348 | } |
43349 | |
43350 | |
43351 | |
43352 | |
43353 | |
43354 | |
43355 | unsigned EltBits = BCVT.getScalarSizeInBits(); |
43356 | if (DAG.ComputeNumSignBits(BC) == EltBits) { |
43357 | assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result"); |
43358 | APInt SignMask = APInt::getSignMask(EltBits); |
43359 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
43360 | if (SDValue Res = |
43361 | TLI.SimplifyMultipleUseDemandedBits(BC, SignMask, DAG)) { |
43362 | |
43363 | |
43364 | SDLoc DL(EFLAGS); |
43365 | if (EltBits == 16) { |
43366 | MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8; |
43367 | Res = DAG.getBitcast(MovmskVT, Res); |
43368 | Res = getPMOVMSKB(DL, Res, DAG, Subtarget); |
43369 | Res = DAG.getNode(ISD::AND, DL, MVT::i32, Res, |
43370 | DAG.getConstant(0xAAAAAAAA, DL, MVT::i32)); |
43371 | } else { |
43372 | Res = getPMOVMSKB(DL, Res, DAG, Subtarget); |
43373 | } |
43374 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Res, |
43375 | DAG.getConstant(0, DL, MVT::i32)); |
43376 | } |
43377 | } |
43378 | } |
43379 | |
43380 | |
43381 | if (ISD::isBuildVectorAllOnes(Op0.getNode())) |
43382 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1); |
43383 | |
43384 | |
43385 | if (ISD::isBuildVectorAllOnes(Op1.getNode())) |
43386 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0); |
43387 | } |
43388 | |
43389 | return SDValue(); |
43390 | } |
43391 | |
43392 | |
43393 | static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, |
43394 | SelectionDAG &DAG, |
43395 | const X86Subtarget &Subtarget) { |
43396 | |
43397 | |
43398 | if (!(CC == X86::COND_E || CC == X86::COND_NE)) |
43399 | return SDValue(); |
43400 | if (EFLAGS.getValueType() != MVT::i32) |
43401 | return SDValue(); |
43402 | unsigned CmpOpcode = EFLAGS.getOpcode(); |
43403 | if (CmpOpcode != X86ISD::CMP && CmpOpcode != X86ISD::SUB) |
43404 | return SDValue(); |
43405 | auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.getOperand(1)); |
43406 | if (!CmpConstant) |
43407 | return SDValue(); |
43408 | const APInt &CmpVal = CmpConstant->getAPIntValue(); |
43409 | |
43410 | SDValue CmpOp = EFLAGS.getOperand(0); |
43411 | unsigned CmpBits = CmpOp.getValueSizeInBits(); |
43412 | assert(CmpBits == CmpVal.getBitWidth() && "Value size mismatch"); |
43413 | |
43414 | |
43415 | if (CmpOp.getOpcode() == ISD::TRUNCATE) |
43416 | CmpOp = CmpOp.getOperand(0); |
43417 | |
43418 | |
43419 | if (CmpOp.getOpcode() != X86ISD::MOVMSK) |
43420 | return SDValue(); |
43421 | |
43422 | SDValue Vec = CmpOp.getOperand(0); |
43423 | MVT VecVT = Vec.getSimpleValueType(); |
43424 | assert((VecVT.is128BitVector() || VecVT.is256BitVector()) && |
43425 | "Unexpected MOVMSK operand"); |
43426 | unsigned NumElts = VecVT.getVectorNumElements(); |
43427 | unsigned NumEltBits = VecVT.getScalarSizeInBits(); |
43428 | |
43429 | bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isNullValue(); |
43430 | bool IsAllOf = CmpOpcode == X86ISD::SUB && NumElts <= CmpBits && |
43431 | CmpVal.isMask(NumElts); |
43432 | if (!IsAnyOf && !IsAllOf) |
43433 | return SDValue(); |
43434 | |
43435 | |
43436 | |
43437 | |
43438 | |
43439 | if (Vec.getOpcode() == ISD::BITCAST) { |
43440 | SDValue BC = peekThroughBitcasts(Vec); |
43441 | MVT BCVT = BC.getSimpleValueType(); |
43442 | unsigned BCNumElts = BCVT.getVectorNumElements(); |
43443 | unsigned BCNumEltBits = BCVT.getScalarSizeInBits(); |
43444 | if ((BCNumEltBits == 32 || BCNumEltBits == 64) && |
43445 | BCNumEltBits > NumEltBits && |
43446 | DAG.ComputeNumSignBits(BC) > (BCNumEltBits - NumEltBits)) { |
43447 | SDLoc DL(EFLAGS); |
43448 | unsigned CmpMask = IsAnyOf ? 0 : ((1 << BCNumElts) - 1); |
43449 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, |
43450 | DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, BC), |
43451 | DAG.getConstant(CmpMask, DL, MVT::i32)); |
43452 | } |
43453 | } |
43454 | |
43455 | |
43456 | |
43457 | if (IsAllOf && Subtarget.hasSSE41()) { |
43458 | SDValue BC = peekThroughBitcasts(Vec); |
43459 | if (BC.getOpcode() == X86ISD::PCMPEQ && |
43460 | ISD::isBuildVectorAllZeros(BC.getOperand(1).getNode())) { |
43461 | MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; |
43462 | SDValue V = DAG.getBitcast(TestVT, BC.getOperand(0)); |
43463 | return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V); |
43464 | } |
43465 | } |
43466 | |
43467 | |
43468 | |
43469 | |
43470 | |
43471 | |
43472 | if (Vec.getOpcode() == X86ISD::PACKSS && VecVT == MVT::v16i8) { |
43473 | SDValue VecOp0 = Vec.getOperand(0); |
43474 | SDValue VecOp1 = Vec.getOperand(1); |
43475 | bool SignExt0 = DAG.ComputeNumSignBits(VecOp0) > 8; |
43476 | bool SignExt1 = DAG.ComputeNumSignBits(VecOp1) > 8; |
43477 | |
43478 | if (IsAnyOf && CmpBits == 8 && VecOp1.isUndef()) { |
43479 | SDLoc DL(EFLAGS); |
43480 | SDValue Result = DAG.getBitcast(MVT::v16i8, VecOp0); |
43481 | Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
43482 | Result = DAG.getZExtOrTrunc(Result, DL, MVT::i16); |
43483 | if (!SignExt0) { |
43484 | Result = DAG.getNode(ISD::AND, DL, MVT::i16, Result, |
43485 | DAG.getConstant(0xAAAA, DL, MVT::i16)); |
43486 | } |
43487 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, |
43488 | DAG.getConstant(0, DL, MVT::i16)); |
43489 | } |
43490 | |
43491 | |
43492 | if (CmpBits >= 16 && Subtarget.hasInt256() && |
43493 | VecOp0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
43494 | VecOp1.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
43495 | VecOp0.getOperand(0) == VecOp1.getOperand(0) && |
43496 | VecOp0.getConstantOperandAPInt(1) == 0 && |
43497 | VecOp1.getConstantOperandAPInt(1) == 8 && |
43498 | (IsAnyOf || (SignExt0 && SignExt1))) { |
43499 | SDLoc DL(EFLAGS); |
43500 | SDValue Result = DAG.getBitcast(MVT::v32i8, VecOp0.getOperand(0)); |
43501 | Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
43502 | unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF; |
43503 | if (!SignExt0 || !SignExt1) { |
43504 | assert(IsAnyOf && "Only perform v16i16 signmasks for any_of patterns"); |
43505 | Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, |
43506 | DAG.getConstant(0xAAAAAAAA, DL, MVT::i32)); |
43507 | } |
43508 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, |
43509 | DAG.getConstant(CmpMask, DL, MVT::i32)); |
43510 | } |
43511 | } |
43512 | |
43513 | |
43514 | SmallVector<int, 32> ShuffleMask; |
43515 | SmallVector<SDValue, 2> ShuffleInputs; |
43516 | if (NumElts <= CmpBits && |
43517 | getTargetShuffleInputs(peekThroughBitcasts(Vec), ShuffleInputs, |
43518 | ShuffleMask, DAG) && |
43519 | ShuffleInputs.size() == 1 && !isAnyZeroOrUndef(ShuffleMask) && |
43520 | ShuffleInputs[0].getValueSizeInBits() == VecVT.getSizeInBits()) { |
43521 | unsigned NumShuffleElts = ShuffleMask.size(); |
43522 | APInt DemandedElts = APInt::getNullValue(NumShuffleElts); |
43523 | for (int M : ShuffleMask) { |
43524 | assert(0 <= M && M < (int)NumShuffleElts && "Bad unary shuffle index"); |
43525 | DemandedElts.setBit(M); |
43526 | } |
43527 | if (DemandedElts.isAllOnesValue()) { |
43528 | SDLoc DL(EFLAGS); |
43529 | SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]); |
43530 | Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
43531 | Result = |
43532 | DAG.getZExtOrTrunc(Result, DL, EFLAGS.getOperand(0).getValueType()); |
43533 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, |
43534 | EFLAGS.getOperand(1)); |
43535 | } |
43536 | } |
43537 | |
43538 | return SDValue(); |
43539 | } |
43540 | |
43541 | |
43542 | |
43543 | |
43544 | static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, |
43545 | SelectionDAG &DAG, |
43546 | const X86Subtarget &Subtarget) { |
43547 | if (CC == X86::COND_B) |
43548 | if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG)) |
43549 | return Flags; |
43550 | |
43551 | if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC)) |
43552 | return R; |
43553 | |
43554 | if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG, Subtarget)) |
43555 | return R; |
43556 | |
43557 | if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget)) |
43558 | return R; |
43559 | |
43560 | return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget); |
43561 | } |
43562 | |
43563 | |
43564 | static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, |
43565 | TargetLowering::DAGCombinerInfo &DCI, |
43566 | const X86Subtarget &Subtarget) { |
43567 | SDLoc DL(N); |
43568 | |
43569 | SDValue FalseOp = N->getOperand(0); |
43570 | SDValue TrueOp = N->getOperand(1); |
43571 | X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); |
43572 | SDValue Cond = N->getOperand(3); |
43573 | |
43574 | |
43575 | if (TrueOp == FalseOp) |
43576 | return TrueOp; |
43577 | |
43578 | |
43579 | |
43580 | if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) { |
43581 | if (!(FalseOp.getValueType() == MVT::f80 || |
43582 | (FalseOp.getValueType() == MVT::f64 && !Subtarget.hasSSE2()) || |
43583 | (FalseOp.getValueType() == MVT::f32 && !Subtarget.hasSSE1())) || |
43584 | !Subtarget.hasCMov() || hasFPCMov(CC)) { |
43585 | SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8), |
43586 | Flags}; |
43587 | return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); |
43588 | } |
43589 | } |
43590 | |
43591 | |
43592 | |
43593 | |
43594 | if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) { |
43595 | if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) { |
43596 | |
43597 | |
43598 | if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { |
43599 | CC = X86::GetOppositeBranchCondition(CC); |
43600 | std::swap(TrueC, FalseC); |
43601 | std::swap(TrueOp, FalseOp); |
43602 | } |
43603 | |
43604 | |
43605 | |
43606 | |
43607 | if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { |
43608 | Cond = getSETCC(CC, Cond, DL, DAG); |
43609 | |
43610 | |
43611 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); |
43612 | |
43613 | unsigned ShAmt = TrueC->getAPIntValue().logBase2(); |
43614 | Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, |
43615 | DAG.getConstant(ShAmt, DL, MVT::i8)); |
43616 | return Cond; |
43617 | } |
43618 | |
43619 | |
43620 | |
43621 | if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { |
43622 | Cond = getSETCC(CC, Cond, DL, DAG); |
43623 | |
43624 | |
43625 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, |
43626 | FalseC->getValueType(0), Cond); |
43627 | Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, |
43628 | SDValue(FalseC, 0)); |
43629 | return Cond; |
43630 | } |
43631 | |
43632 | |
43633 | |
43634 | if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { |
43635 | APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); |
43636 | assert(Diff.getBitWidth() == N->getValueType(0).getSizeInBits() && |
43637 | "Implicit constant truncation"); |
43638 | |
43639 | bool isFastMultiplier = false; |
43640 | if (Diff.ult(10)) { |
43641 | switch (Diff.getZExtValue()) { |
43642 | default: break; |
43643 | case 1: |
43644 | case 2: |
43645 | case 3: |
43646 | case 4: |
43647 | case 5: |
43648 | case 8: |
43649 | case 9: |
43650 | isFastMultiplier = true; |
43651 | break; |
43652 | } |
43653 | } |
43654 | |
43655 | if (isFastMultiplier) { |
43656 | Cond = getSETCC(CC, Cond, DL ,DAG); |
43657 | |
43658 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), |
43659 | Cond); |
43660 | |
43661 | if (Diff != 1) |
43662 | Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, |
43663 | DAG.getConstant(Diff, DL, Cond.getValueType())); |
43664 | |
43665 | |
43666 | if (FalseC->getAPIntValue() != 0) |
43667 | Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, |
43668 | SDValue(FalseC, 0)); |
43669 | return Cond; |
43670 | } |
43671 | } |
43672 | } |
43673 | } |
43674 | |
43675 | |
43676 | |
43677 | |
43678 | |
43679 | |
43680 | |
43681 | |
43682 | |
43683 | |
43684 | |
43685 | |
43686 | |
43687 | |
43688 | |
43689 | if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) { |
43690 | |
43691 | |
43692 | |
43693 | ConstantSDNode *CmpAgainst = nullptr; |
43694 | if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && |
43695 | (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) && |
43696 | !isa<ConstantSDNode>(Cond.getOperand(0))) { |
43697 | |
43698 | if (CC == X86::COND_NE && |
43699 | CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) { |
43700 | CC = X86::GetOppositeBranchCondition(CC); |
43701 | std::swap(TrueOp, FalseOp); |
43702 | } |
43703 | |
43704 | if (CC == X86::COND_E && |
43705 | CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) { |
43706 | SDValue Ops[] = {FalseOp, Cond.getOperand(0), |
43707 | DAG.getTargetConstant(CC, DL, MVT::i8), Cond}; |
43708 | return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); |
43709 | } |
43710 | } |
43711 | } |
43712 | |
43713 | |
43714 | |
43715 | |
43716 | |
43717 | |
43718 | |
43719 | |
43720 | |
43721 | |
43722 | |
43723 | |
43724 | |
43725 | |
43726 | |
43727 | |
43728 | |
43729 | |
43730 | if (CC == X86::COND_NE) { |
43731 | SDValue Flags; |
43732 | X86::CondCode CC0, CC1; |
43733 | bool isAndSetCC; |
43734 | if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) { |
43735 | if (isAndSetCC) { |
43736 | std::swap(FalseOp, TrueOp); |
43737 | CC0 = X86::GetOppositeBranchCondition(CC0); |
43738 | CC1 = X86::GetOppositeBranchCondition(CC1); |
43739 | } |
43740 | |
43741 | SDValue LOps[] = {FalseOp, TrueOp, |
43742 | DAG.getTargetConstant(CC0, DL, MVT::i8), Flags}; |
43743 | SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps); |
43744 | SDValue Ops[] = {LCMOV, TrueOp, DAG.getTargetConstant(CC1, DL, MVT::i8), |
43745 | Flags}; |
43746 | SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); |
43747 | return CMOV; |
43748 | } |
43749 | } |
43750 | |
43751 | |
43752 | |
43753 | |
43754 | |
43755 | if ((CC == X86::COND_NE || CC == X86::COND_E) && |
43756 | Cond.getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1))) { |
43757 | SDValue Add = TrueOp; |
43758 | SDValue Const = FalseOp; |
43759 | |
43760 | if (CC == X86::COND_E) |
43761 | std::swap(Add, Const); |
43762 | |
43763 | |
43764 | |
43765 | if (Const == Cond.getOperand(0)) |
43766 | Const = Cond.getOperand(1); |
43767 | |
43768 | |
43769 | if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD && |
43770 | Add.hasOneUse() && isa<ConstantSDNode>(Add.getOperand(1)) && |
43771 | (Add.getOperand(0).getOpcode() == ISD::CTTZ_ZERO_UNDEF || |
43772 | Add.getOperand(0).getOpcode() == ISD::CTTZ) && |
43773 | Add.getOperand(0).getOperand(0) == Cond.getOperand(0)) { |
43774 | EVT VT = N->getValueType(0); |
43775 | |
43776 | SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1)); |
43777 | SDValue CMov = |
43778 | DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), |
43779 | DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8), Cond); |
43780 | return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1)); |
43781 | } |
43782 | } |
43783 | |
43784 | return SDValue(); |
43785 | } |
43786 | |
43787 | |
43788 | enum class ShrinkMode { MULS8, MULU8, MULS16, MULU16 }; |
43789 | |
43790 | static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) { |
43791 | EVT VT = N->getOperand(0).getValueType(); |
43792 | if (VT.getScalarSizeInBits() != 32) |
43793 | return false; |
43794 | |
43795 | assert(N->getNumOperands() == 2 && "NumOperands of Mul are 2"); |
43796 | unsigned SignBits[2] = {1, 1}; |
43797 | bool IsPositive[2] = {false, false}; |
43798 | for (unsigned i = 0; i < 2; i++) { |
43799 | SDValue Opd = N->getOperand(i); |
43800 | |
43801 | SignBits[i] = DAG.ComputeNumSignBits(Opd); |
43802 | IsPositive[i] = DAG.SignBitIsZero(Opd); |
43803 | } |
43804 | |
43805 | bool AllPositive = IsPositive[0] && IsPositive[1]; |
43806 | unsigned MinSignBits = std::min(SignBits[0], SignBits[1]); |
43807 | |
43808 | if (MinSignBits >= 25) |
43809 | Mode = ShrinkMode::MULS8; |
43810 | |
43811 | else if (AllPositive && MinSignBits >= 24) |
43812 | Mode = ShrinkMode::MULU8; |
43813 | |
43814 | else if (MinSignBits >= 17) |
43815 | Mode = ShrinkMode::MULS16; |
43816 | |
43817 | else if (AllPositive && MinSignBits >= 16) |
43818 | Mode = ShrinkMode::MULU16; |
43819 | else |
43820 | return false; |
43821 | return true; |
43822 | } |
43823 | |
43824 | |
43825 | |
43826 | |
43827 | |
43828 | |
43829 | |
43830 | |
43831 | |
43832 | |
43833 | |
43834 | |
43835 | |
43836 | |
43837 | |
43838 | |
43839 | |
43840 | |
43841 | |
43842 | |
43843 | |
43844 | |
43845 | |
43846 | |
43847 | |
43848 | |
43849 | |
43850 | |
43851 | |
43852 | static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, |
43853 | const X86Subtarget &Subtarget) { |
43854 | |
43855 | |
43856 | if (!Subtarget.hasSSE2()) |
43857 | return SDValue(); |
43858 | |
43859 | |
43860 | |
43861 | |
43862 | |
43863 | bool OptForMinSize = DAG.getMachineFunction().getFunction().hasMinSize(); |
43864 | if (Subtarget.hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow())) |
43865 | return SDValue(); |
43866 | |
43867 | ShrinkMode Mode; |
43868 | if (!canReduceVMulWidth(N, DAG, Mode)) |
43869 | return SDValue(); |
43870 | |
43871 | SDLoc DL(N); |
43872 | SDValue N0 = N->getOperand(0); |
43873 | SDValue N1 = N->getOperand(1); |
43874 | EVT VT = N->getOperand(0).getValueType(); |
43875 | unsigned NumElts = VT.getVectorNumElements(); |
43876 | if ((NumElts % 2) != 0) |
43877 | return SDValue(); |
43878 | |
43879 | EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); |
43880 | |
43881 | |
43882 | SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0); |
43883 | SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1); |
43884 | |
43885 | |
43886 | |
43887 | SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); |
43888 | if (Mode == ShrinkMode::MULU8 || Mode == ShrinkMode::MULS8) |
43889 | return DAG.getNode((Mode == ShrinkMode::MULU8) ? ISD::ZERO_EXTEND |
43890 | : ISD::SIGN_EXTEND, |
43891 | DL, VT, MulLo); |
43892 | |
43893 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts / 2); |
43894 | |
43895 | |
43896 | SDValue MulHi = |
43897 | DAG.getNode(Mode == ShrinkMode::MULS16 ? ISD::MULHS : ISD::MULHU, DL, |
43898 | ReducedVT, NewN0, NewN1); |
43899 | |
43900 | |
43901 | |
43902 | |
43903 | SmallVector<int, 16> ShuffleMask(NumElts); |
43904 | for (unsigned i = 0, e = NumElts / 2; i < e; i++) { |
43905 | ShuffleMask[2 * i] = i; |
43906 | ShuffleMask[2 * i + 1] = i + NumElts; |
43907 | } |
43908 | SDValue ResLo = |
43909 | DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); |
43910 | ResLo = DAG.getBitcast(ResVT, ResLo); |
43911 | |
43912 | for (unsigned i = 0, e = NumElts / 2; i < e; i++) { |
43913 | ShuffleMask[2 * i] = i + NumElts / 2; |
43914 | ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; |
43915 | } |
43916 | SDValue ResHi = |
43917 | DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); |
43918 | ResHi = DAG.getBitcast(ResVT, ResHi); |
43919 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); |
43920 | } |
43921 | |
43922 | static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, |
43923 | EVT VT, const SDLoc &DL) { |
43924 | |
43925 | auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { |
43926 | SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
43927 | DAG.getConstant(Mult, DL, VT)); |
43928 | Result = DAG.getNode(ISD::SHL, DL, VT, Result, |
43929 | DAG.getConstant(Shift, DL, MVT::i8)); |
43930 | Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, |
43931 | N->getOperand(0)); |
43932 | return Result; |
43933 | }; |
43934 | |
43935 | auto combineMulMulAddOrSub = [&](int Mul1, int Mul2, bool isAdd) { |
43936 | SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
43937 | DAG.getConstant(Mul1, DL, VT)); |
43938 | Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, Result, |
43939 | DAG.getConstant(Mul2, DL, VT)); |
43940 | Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, |
43941 | N->getOperand(0)); |
43942 | return Result; |
43943 | }; |
43944 | |
43945 | switch (MulAmt) { |
43946 | default: |
43947 | break; |
43948 | case 11: |
43949 | |
43950 | return combineMulShlAddOrSub(5, 1, true); |
43951 | case 21: |
43952 | |
43953 | return combineMulShlAddOrSub(5, 2, true); |
43954 | case 41: |
43955 | |
43956 | return combineMulShlAddOrSub(5, 3, true); |
43957 | case 22: |
43958 | |
43959 | return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), |
43960 | combineMulShlAddOrSub(5, 2, true)); |
43961 | case 19: |
43962 | |
43963 | return combineMulShlAddOrSub(9, 1, true); |
43964 | case 37: |
43965 | |
43966 | return combineMulShlAddOrSub(9, 2, true); |
43967 | case 73: |
43968 | |
43969 | return combineMulShlAddOrSub(9, 3, true); |
43970 | case 13: |
43971 | |
43972 | return combineMulShlAddOrSub(3, 2, true); |
43973 | case 23: |
43974 | |
43975 | return combineMulShlAddOrSub(3, 3, false); |
43976 | case 26: |
43977 | |
43978 | return combineMulMulAddOrSub(5, 5, true); |
43979 | case 28: |
43980 | |
43981 | return combineMulMulAddOrSub(9, 3, true); |
43982 | case 29: |
43983 | |
43984 | return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), |
43985 | combineMulMulAddOrSub(9, 3, true)); |
43986 | } |
43987 | |
43988 | |
43989 | |
43990 | |
43991 | |
43992 | |
43993 | if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { |
43994 | unsigned ScaleShift = countTrailingZeros(MulAmt); |
43995 | if (ScaleShift >= 1 && ScaleShift < 4) { |
43996 | unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); |
43997 | SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
43998 | DAG.getConstant(ShiftAmt, DL, MVT::i8)); |
43999 | SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
44000 | DAG.getConstant(ScaleShift, DL, MVT::i8)); |
44001 | return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); |
44002 | } |
44003 | } |
44004 | |
44005 | return SDValue(); |
44006 | } |
44007 | |
44008 | |
44009 | |
44010 | |
44011 | static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, |
44012 | const X86Subtarget &Subtarget) { |
44013 | if (!Subtarget.hasSSE2()) |
44014 | return SDValue(); |
44015 | |
44016 | if (Subtarget.isPMADDWDSlow()) |
44017 | return SDValue(); |
44018 | |
44019 | EVT VT = N->getValueType(0); |
44020 | |
44021 | |
44022 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i32) |
44023 | return SDValue(); |
44024 | |
44025 | |
44026 | if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
44027 | return SDValue(); |
44028 | |
44029 | MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements()); |
44030 | |
44031 | |
44032 | if (WVT == MVT::v32i16 && !Subtarget.hasBWI()) |
44033 | return SDValue(); |
44034 | |
44035 | SDValue N0 = N->getOperand(0); |
44036 | SDValue N1 = N->getOperand(1); |
44037 | |
44038 | |
44039 | |
44040 | if (!Subtarget.hasSSE41() && |
44041 | (N0.getOpcode() == ISD::ZERO_EXTEND && |
44042 | N0.getOperand(0).getScalarValueSizeInBits() <= 8) && |
44043 | (N1.getOpcode() == ISD::ZERO_EXTEND && |
44044 | N1.getOperand(0).getScalarValueSizeInBits() <= 8)) |
44045 | return SDValue(); |
44046 | |
44047 | |
44048 | if (DAG.ComputeNumSignBits(N1) < 17 || DAG.ComputeNumSignBits(N0) < 17) |
44049 | return SDValue(); |
44050 | |
44051 | |
44052 | APInt Mask17 = APInt::getHighBitsSet(32, 17); |
44053 | if (!DAG.MaskedValueIsZero(N1, Mask17) && !DAG.MaskedValueIsZero(N0, Mask17)) |
44054 | return SDValue(); |
44055 | |
44056 | |
44057 | auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
44058 | ArrayRef<SDValue> Ops) { |
44059 | MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); |
44060 | return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops); |
44061 | }; |
44062 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, |
44063 | { DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) }, |
44064 | PMADDWDBuilder); |
44065 | } |
44066 | |
44067 | static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG, |
44068 | const X86Subtarget &Subtarget) { |
44069 | if (!Subtarget.hasSSE2()) |
44070 | return SDValue(); |
44071 | |
44072 | EVT VT = N->getValueType(0); |
44073 | |
44074 | |
44075 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 || |
44076 | VT.getVectorNumElements() < 2 || |
44077 | !isPowerOf2_32(VT.getVectorNumElements())) |
44078 | return SDValue(); |
44079 | |
44080 | SDValue N0 = N->getOperand(0); |
44081 | SDValue N1 = N->getOperand(1); |
44082 | |
44083 | |
44084 | |
44085 | if (Subtarget.hasSSE41() && DAG.ComputeNumSignBits(N0) > 32 && |
44086 | DAG.ComputeNumSignBits(N1) > 32) { |
44087 | auto PMULDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
44088 | ArrayRef<SDValue> Ops) { |
44089 | return DAG.getNode(X86ISD::PMULDQ, DL, Ops[0].getValueType(), Ops); |
44090 | }; |
44091 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, |
44092 | PMULDQBuilder, false); |
44093 | } |
44094 | |
44095 | |
44096 | APInt Mask = APInt::getHighBitsSet(64, 32); |
44097 | if (DAG.MaskedValueIsZero(N0, Mask) && DAG.MaskedValueIsZero(N1, Mask)) { |
44098 | auto PMULUDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
44099 | ArrayRef<SDValue> Ops) { |
44100 | return DAG.getNode(X86ISD::PMULUDQ, DL, Ops[0].getValueType(), Ops); |
44101 | }; |
44102 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, |
44103 | PMULUDQBuilder, false); |
44104 | } |
44105 | |
44106 | return SDValue(); |
44107 | } |
44108 | |
44109 | static SDValue combineMul(SDNode *N, SelectionDAG &DAG, |
44110 | TargetLowering::DAGCombinerInfo &DCI, |
44111 | const X86Subtarget &Subtarget) { |
44112 | EVT VT = N->getValueType(0); |
44113 | |
44114 | if (SDValue V = combineMulToPMADDWD(N, DAG, Subtarget)) |
44115 | return V; |
44116 | |
44117 | if (SDValue V = combineMulToPMULDQ(N, DAG, Subtarget)) |
44118 | return V; |
44119 | |
44120 | if (DCI.isBeforeLegalize() && VT.isVector()) |
44121 | return reduceVMULWidth(N, DAG, Subtarget); |
44122 | |
44123 | |
44124 | |
44125 | if (!MulConstantOptimization) |
44126 | return SDValue(); |
44127 | |
44128 | |
44129 | if (DAG.getMachineFunction().getFunction().hasMinSize()) |
44130 | return SDValue(); |
44131 | |
44132 | if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) |
44133 | return SDValue(); |
44134 | |
44135 | if (VT != MVT::i64 && VT != MVT::i32) |
44136 | return SDValue(); |
44137 | |
44138 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
44139 | if (!C) |
44140 | return SDValue(); |
44141 | if (isPowerOf2_64(C->getZExtValue())) |
44142 | return SDValue(); |
44143 | |
44144 | int64_t SignMulAmt = C->getSExtValue(); |
44145 | assert(SignMulAmt != INT64_MIN && "Int min should have been handled!"); |
44146 | uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt; |
44147 | |
44148 | SDLoc DL(N); |
44149 | if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) { |
44150 | SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
44151 | DAG.getConstant(AbsMulAmt, DL, VT)); |
44152 | if (SignMulAmt < 0) |
44153 | NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
44154 | NewMul); |
44155 | |
44156 | return NewMul; |
44157 | } |
44158 | |
44159 | uint64_t MulAmt1 = 0; |
44160 | uint64_t MulAmt2 = 0; |
44161 | if ((AbsMulAmt % 9) == 0) { |
44162 | MulAmt1 = 9; |
44163 | MulAmt2 = AbsMulAmt / 9; |
44164 | } else if ((AbsMulAmt % 5) == 0) { |
44165 | MulAmt1 = 5; |
44166 | MulAmt2 = AbsMulAmt / 5; |
44167 | } else if ((AbsMulAmt % 3) == 0) { |
44168 | MulAmt1 = 3; |
44169 | MulAmt2 = AbsMulAmt / 3; |
44170 | } |
44171 | |
44172 | SDValue NewMul; |
44173 | |
44174 | if (MulAmt2 && |
44175 | (isPowerOf2_64(MulAmt2) || |
44176 | (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) { |
44177 | |
44178 | if (isPowerOf2_64(MulAmt2) && |
44179 | !(SignMulAmt >= 0 && N->hasOneUse() && |
44180 | N->use_begin()->getOpcode() == ISD::ADD)) |
44181 | |
44182 | |
44183 | |
44184 | |
44185 | std::swap(MulAmt1, MulAmt2); |
44186 | |
44187 | if (isPowerOf2_64(MulAmt1)) |
44188 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
44189 | DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8)); |
44190 | else |
44191 | NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
44192 | DAG.getConstant(MulAmt1, DL, VT)); |
44193 | |
44194 | if (isPowerOf2_64(MulAmt2)) |
44195 | NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, |
44196 | DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8)); |
44197 | else |
44198 | NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, |
44199 | DAG.getConstant(MulAmt2, DL, VT)); |
44200 | |
44201 | |
44202 | if (SignMulAmt < 0) |
44203 | NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
44204 | NewMul); |
44205 | } else if (!Subtarget.slowLEA()) |
44206 | NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL); |
44207 | |
44208 | if (!NewMul) { |
44209 | assert(C->getZExtValue() != 0 && |
44210 | C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) && |
44211 | "Both cases that could cause potential overflows should have " |
44212 | "already been handled."); |
44213 | if (isPowerOf2_64(AbsMulAmt - 1)) { |
44214 | |
44215 | NewMul = DAG.getNode( |
44216 | ISD::ADD, DL, VT, N->getOperand(0), |
44217 | DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
44218 | DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, |
44219 | MVT::i8))); |
44220 | |
44221 | if (SignMulAmt < 0) |
44222 | NewMul = DAG.getNode(ISD::SUB, DL, VT, |
44223 | DAG.getConstant(0, DL, VT), NewMul); |
44224 | } else if (isPowerOf2_64(AbsMulAmt + 1)) { |
44225 | |
44226 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
44227 | DAG.getConstant(Log2_64(AbsMulAmt + 1), |
44228 | DL, MVT::i8)); |
44229 | |
44230 | if (SignMulAmt < 0) |
44231 | NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul); |
44232 | else |
44233 | NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); |
44234 | } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2)) { |
44235 | |
44236 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
44237 | DAG.getConstant(Log2_64(AbsMulAmt - 2), |
44238 | DL, MVT::i8)); |
44239 | NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); |
44240 | NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); |
44241 | } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt + 2)) { |
44242 | |
44243 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
44244 | DAG.getConstant(Log2_64(AbsMulAmt + 2), |
44245 | DL, MVT::i8)); |
44246 | NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); |
44247 | NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); |
44248 | } |
44249 | } |
44250 | |
44251 | return NewMul; |
44252 | } |
44253 | |
44254 | |
44255 | |
44256 | |
44257 | |
44258 | |
44259 | |
44260 | |
44261 | static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG, |
44262 | const X86Subtarget &Subtarget) { |
44263 | assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && |
44264 | "SRL or SRA node is required here!"); |
44265 | SDLoc DL(N); |
44266 | |
44267 | |
44268 | |
44269 | if (!Subtarget.hasSSE41()) |
44270 | return SDValue(); |
44271 | |
44272 | |
44273 | SDValue ShiftOperand = N->getOperand(0); |
44274 | if (ShiftOperand.getOpcode() != ISD::MUL || !ShiftOperand.hasOneUse()) |
44275 | return SDValue(); |
44276 | |
44277 | |
44278 | EVT VT = N->getValueType(0); |
44279 | if (!VT.isVector() || VT.getVectorElementType().getSizeInBits() < 32) |
44280 | return SDValue(); |
44281 | |
44282 | |
44283 | APInt ShiftAmt; |
44284 | if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), ShiftAmt) || |
44285 | ShiftAmt != 16) |
44286 | return SDValue(); |
44287 | |
44288 | SDValue LHS = ShiftOperand.getOperand(0); |
44289 | SDValue RHS = ShiftOperand.getOperand(1); |
44290 | |
44291 | unsigned ExtOpc = LHS.getOpcode(); |
44292 | if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) || |
44293 | RHS.getOpcode() != ExtOpc) |
44294 | return SDValue(); |
44295 | |
44296 | |
44297 | LHS = LHS.getOperand(0); |
44298 | RHS = RHS.getOperand(0); |
44299 | |
44300 | |
44301 | EVT MulVT = LHS.getValueType(); |
44302 | if (MulVT.getVectorElementType() != MVT::i16 || RHS.getValueType() != MulVT) |
44303 | return SDValue(); |
44304 | |
44305 | unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; |
44306 | SDValue Mulh = DAG.getNode(Opc, DL, MulVT, LHS, RHS); |
44307 | |
44308 | ExtOpc = N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
44309 | return DAG.getNode(ExtOpc, DL, VT, Mulh); |
44310 | } |
44311 | |
44312 | static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) { |
44313 | SDValue N0 = N->getOperand(0); |
44314 | SDValue N1 = N->getOperand(1); |
44315 | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
44316 | EVT VT = N0.getValueType(); |
44317 | |
44318 | |
44319 | |
44320 | if (VT.isInteger() && !VT.isVector() && |
44321 | N1C && N0.getOpcode() == ISD::AND && |
44322 | N0.getOperand(1).getOpcode() == ISD::Constant) { |
44323 | SDValue N00 = N0.getOperand(0); |
44324 | APInt Mask = N0.getConstantOperandAPInt(1); |
44325 | Mask <<= N1C->getAPIntValue(); |
44326 | bool MaskOK = false; |
44327 | |
44328 | |
44329 | |
44330 | |
44331 | |
44332 | |
44333 | |
44334 | |
44335 | |
44336 | |
44337 | |
44338 | if (N00.getOpcode() == X86ISD::SETCC_CARRY) { |
44339 | MaskOK = true; |
44340 | } else if (N00.getOpcode() == ISD::SIGN_EXTEND && |
44341 | N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { |
44342 | MaskOK = true; |
44343 | } else if ((N00.getOpcode() == ISD::ZERO_EXTEND || |
44344 | N00.getOpcode() == ISD::ANY_EXTEND) && |
44345 | N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { |
44346 | MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits()); |
44347 | } |
44348 | if (MaskOK && Mask != 0) { |
44349 | SDLoc DL(N); |
44350 | return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT)); |
44351 | } |
44352 | } |
44353 | |
44354 | |
44355 | |
44356 | |
44357 | |
44358 | if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) |
44359 | if (auto *N1SplatC = N1BV->getConstantSplatNode()) { |
44360 | assert(N0.getValueType().isVector() && "Invalid vector shift type"); |
44361 | |
44362 | |
44363 | |
44364 | if (N1SplatC->isOne()) |
44365 | return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); |
44366 | } |
44367 | |
44368 | return SDValue(); |
44369 | } |
44370 | |
44371 | static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, |
44372 | const X86Subtarget &Subtarget) { |
44373 | SDValue N0 = N->getOperand(0); |
44374 | SDValue N1 = N->getOperand(1); |
44375 | EVT VT = N0.getValueType(); |
44376 | unsigned Size = VT.getSizeInBits(); |
44377 | |
44378 | if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) |
44379 | return V; |
44380 | |
44381 | |
44382 | |
44383 | |
44384 | |
44385 | |
44386 | |
44387 | |
44388 | |
44389 | |
44390 | |
44391 | |
44392 | if (VT.isVector() || N1.getOpcode() != ISD::Constant || |
44393 | N0.getOpcode() != ISD::SHL || !N0.hasOneUse() || |
44394 | N0.getOperand(1).getOpcode() != ISD::Constant) |
44395 | return SDValue(); |
44396 | |
44397 | SDValue N00 = N0.getOperand(0); |
44398 | SDValue N01 = N0.getOperand(1); |
44399 | APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue(); |
44400 | APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue(); |
44401 | EVT CVT = N1.getValueType(); |
44402 | |
44403 | if (SarConst.isNegative()) |
44404 | return SDValue(); |
44405 | |
44406 | for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { |
44407 | unsigned ShiftSize = SVT.getSizeInBits(); |
44408 | |
44409 | |
44410 | if (ShiftSize >= Size || ShlConst != Size - ShiftSize) |
44411 | continue; |
44412 | SDLoc DL(N); |
44413 | SDValue NN = |
44414 | DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); |
44415 | SarConst = SarConst - (Size - ShiftSize); |
44416 | if (SarConst == 0) |
44417 | return NN; |
44418 | else if (SarConst.isNegative()) |
44419 | return DAG.getNode(ISD::SHL, DL, VT, NN, |
44420 | DAG.getConstant(-SarConst, DL, CVT)); |
44421 | else |
44422 | return DAG.getNode(ISD::SRA, DL, VT, NN, |
44423 | DAG.getConstant(SarConst, DL, CVT)); |
44424 | } |
44425 | return SDValue(); |
44426 | } |
44427 | |
44428 | static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG, |
44429 | TargetLowering::DAGCombinerInfo &DCI, |
44430 | const X86Subtarget &Subtarget) { |
44431 | SDValue N0 = N->getOperand(0); |
44432 | SDValue N1 = N->getOperand(1); |
44433 | EVT VT = N0.getValueType(); |
44434 | |
44435 | if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) |
44436 | return V; |
44437 | |
44438 | |
44439 | |
44440 | if (!DCI.isAfterLegalizeDAG()) |
44441 | return SDValue(); |
44442 | |
44443 | |
44444 | |
44445 | |
44446 | |
44447 | if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) |
44448 | return SDValue(); |
44449 | |
44450 | auto *ShiftC = dyn_cast<ConstantSDNode>(N1); |
44451 | auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
44452 | if (!ShiftC || !AndC) |
44453 | return SDValue(); |
44454 | |
44455 | |
44456 | |
44457 | |
44458 | APInt MaskVal = AndC->getAPIntValue(); |
44459 | |
44460 | |
44461 | if (MaskVal.isMask()) { |
44462 | unsigned TO = MaskVal.countTrailingOnes(); |
44463 | if (TO >= 8 && isPowerOf2_32(TO)) |
44464 | return SDValue(); |
44465 | } |
44466 | |
44467 | APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue()); |
44468 | unsigned OldMaskSize = MaskVal.getMinSignedBits(); |
44469 | unsigned NewMaskSize = NewMaskVal.getMinSignedBits(); |
44470 | if ((OldMaskSize > 8 && NewMaskSize <= 8) || |
44471 | (OldMaskSize > 32 && NewMaskSize <= 32)) { |
44472 | |
44473 | SDLoc DL(N); |
44474 | SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT); |
44475 | SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1); |
44476 | return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask); |
44477 | } |
44478 | return SDValue(); |
44479 | } |
44480 | |
44481 | static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG, |
44482 | const X86Subtarget &Subtarget) { |
44483 | unsigned Opcode = N->getOpcode(); |
44484 | assert(isHorizOp(Opcode) && "Unexpected hadd/hsub/pack opcode"); |
44485 | |
44486 | SDLoc DL(N); |
44487 | EVT VT = N->getValueType(0); |
44488 | SDValue N0 = N->getOperand(0); |
44489 | SDValue N1 = N->getOperand(1); |
44490 | EVT SrcVT = N0.getValueType(); |
44491 | |
44492 | SDValue BC0 = |
44493 | N->isOnlyUserOf(N0.getNode()) ? peekThroughOneUseBitcasts(N0) : N0; |
44494 | SDValue BC1 = |
44495 | N->isOnlyUserOf(N1.getNode()) ? peekThroughOneUseBitcasts(N1) : N1; |
44496 | |
44497 | |
44498 | |
44499 | |
44500 | |
44501 | |
44502 | if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32 && |
44503 | BC0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
44504 | BC1.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
44505 | BC0.getOperand(0) == BC1.getOperand(0) && |
44506 | BC0.getOperand(0).getValueType().is256BitVector() && |
44507 | BC0.getConstantOperandAPInt(1) == 0 && |
44508 | BC1.getConstantOperandAPInt(1) == |
44509 | BC0.getValueType().getVectorNumElements()) { |
44510 | SmallVector<SDValue> ShuffleOps; |
44511 | SmallVector<int> ShuffleMask, ScaledMask; |
44512 | SDValue Vec = peekThroughBitcasts(BC0.getOperand(0)); |
44513 | if (getTargetShuffleInputs(Vec, ShuffleOps, ShuffleMask, DAG)) { |
44514 | resolveTargetShuffleInputsAndMask(ShuffleOps, ShuffleMask); |
44515 | |
44516 | |
44517 | if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 && |
44518 | ShuffleOps[0].getValueType().is256BitVector() && |
44519 | scaleShuffleElements(ShuffleMask, 4, ScaledMask)) { |
44520 | SDValue Lo, Hi; |
44521 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; |
44522 | std::tie(Lo, Hi) = DAG.SplitVector(ShuffleOps[0], DL); |
44523 | Lo = DAG.getBitcast(SrcVT, Lo); |
44524 | Hi = DAG.getBitcast(SrcVT, Hi); |
44525 | SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi); |
44526 | Res = DAG.getBitcast(ShufVT, Res); |
44527 | Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ScaledMask); |
44528 | return DAG.getBitcast(VT, Res); |
44529 | } |
44530 | } |
44531 | } |
44532 | |
44533 | |
44534 | if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) { |
44535 | |
44536 | |
44537 | SmallVector<SDValue> Ops0, Ops1; |
44538 | SmallVector<int> Mask0, Mask1, ScaledMask0, ScaledMask1; |
44539 | bool IsShuf0 = |
44540 | getTargetShuffleInputs(BC0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) && |
44541 | scaleShuffleElements(Mask0, 2, ScaledMask0) && |
44542 | all_of(Ops0, [](SDValue Op) { return Op.getValueSizeInBits() == 128; }); |
44543 | bool IsShuf1 = |
44544 | getTargetShuffleInputs(BC1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) && |
44545 | scaleShuffleElements(Mask1, 2, ScaledMask1) && |
44546 | all_of(Ops1, [](SDValue Op) { return Op.getValueSizeInBits() == 128; }); |
44547 | if (IsShuf0 || IsShuf1) { |
44548 | if (!IsShuf0) { |
44549 | Ops0.assign({BC0}); |
44550 | ScaledMask0.assign({0, 1}); |
44551 | } |
44552 | if (!IsShuf1) { |
44553 | Ops1.assign({BC1}); |
44554 | ScaledMask1.assign({0, 1}); |
44555 | } |
44556 | |
44557 | SDValue LHS, RHS; |
44558 | int PostShuffle[4] = {-1, -1, -1, -1}; |
44559 | auto FindShuffleOpAndIdx = [&](int M, int &Idx, ArrayRef<SDValue> Ops) { |
44560 | if (M < 0) |
44561 | return true; |
44562 | Idx = M % 2; |
44563 | SDValue Src = Ops[M / 2]; |
44564 | if (!LHS || LHS == Src) { |
44565 | LHS = Src; |
44566 | return true; |
44567 | } |
44568 | if (!RHS || RHS == Src) { |
44569 | Idx += 2; |
44570 | RHS = Src; |
44571 | return true; |
44572 | } |
44573 | return false; |
44574 | }; |
44575 | if (FindShuffleOpAndIdx(ScaledMask0[0], PostShuffle[0], Ops0) && |
44576 | FindShuffleOpAndIdx(ScaledMask0[1], PostShuffle[1], Ops0) && |
44577 | FindShuffleOpAndIdx(ScaledMask1[0], PostShuffle[2], Ops1) && |
44578 | FindShuffleOpAndIdx(ScaledMask1[1], PostShuffle[3], Ops1)) { |
44579 | LHS = DAG.getBitcast(SrcVT, LHS); |
44580 | RHS = DAG.getBitcast(SrcVT, RHS ? RHS : LHS); |
44581 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; |
44582 | SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS); |
44583 | Res = DAG.getBitcast(ShufVT, Res); |
44584 | Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, PostShuffle); |
44585 | return DAG.getBitcast(VT, Res); |
44586 | } |
44587 | } |
44588 | } |
44589 | |
44590 | |
44591 | if (VT.is256BitVector() && Subtarget.hasInt256()) { |
44592 | SmallVector<int> Mask0, Mask1; |
44593 | SmallVector<SDValue> Ops0, Ops1; |
44594 | SmallVector<int, 2> ScaledMask0, ScaledMask1; |
44595 | if (getTargetShuffleInputs(BC0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) && |
44596 | getTargetShuffleInputs(BC1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) && |
44597 | !Ops0.empty() && !Ops1.empty() && |
44598 | all_of(Ops0, |
44599 | [](SDValue Op) { return Op.getValueType().is256BitVector(); }) && |
44600 | all_of(Ops1, |
44601 | [](SDValue Op) { return Op.getValueType().is256BitVector(); }) && |
44602 | scaleShuffleElements(Mask0, 2, ScaledMask0) && |
44603 | scaleShuffleElements(Mask1, 2, ScaledMask1)) { |
44604 | SDValue Op00 = peekThroughBitcasts(Ops0.front()); |
44605 | SDValue Op10 = peekThroughBitcasts(Ops1.front()); |
44606 | SDValue Op01 = peekThroughBitcasts(Ops0.back()); |
44607 | SDValue Op11 = peekThroughBitcasts(Ops1.back()); |
44608 | if ((Op00 == Op11) && (Op01 == Op10)) { |
44609 | std::swap(Op10, Op11); |
44610 | ShuffleVectorSDNode::commuteMask(ScaledMask1); |
44611 | } |
44612 | if ((Op00 == Op10) && (Op01 == Op11)) { |
44613 | const int Map[4] = {0, 2, 1, 3}; |
44614 | SmallVector<int, 4> ShuffleMask( |
44615 | {Map[ScaledMask0[0]], Map[ScaledMask1[0]], Map[ScaledMask0[1]], |
44616 | Map[ScaledMask1[1]]}); |
44617 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; |
44618 | SDValue Res = DAG.getNode(Opcode, DL, VT, DAG.getBitcast(SrcVT, Op00), |
44619 | DAG.getBitcast(SrcVT, Op01)); |
44620 | Res = DAG.getBitcast(ShufVT, Res); |
44621 | Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ShuffleMask); |
44622 | return DAG.getBitcast(VT, Res); |
44623 | } |
44624 | } |
44625 | } |
44626 | |
44627 | return SDValue(); |
44628 | } |
44629 | |
44630 | static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, |
44631 | TargetLowering::DAGCombinerInfo &DCI, |
44632 | const X86Subtarget &Subtarget) { |
44633 | unsigned Opcode = N->getOpcode(); |
44634 | assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) && |
44635 | "Unexpected pack opcode"); |
44636 | |
44637 | EVT VT = N->getValueType(0); |
44638 | SDValue N0 = N->getOperand(0); |
44639 | SDValue N1 = N->getOperand(1); |
44640 | unsigned NumDstElts = VT.getVectorNumElements(); |
44641 | unsigned DstBitsPerElt = VT.getScalarSizeInBits(); |
44642 | unsigned SrcBitsPerElt = 2 * DstBitsPerElt; |
44643 | assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt && |
44644 | N1.getScalarValueSizeInBits() == SrcBitsPerElt && |
44645 | "Unexpected PACKSS/PACKUS input type"); |
44646 | |
44647 | bool IsSigned = (X86ISD::PACKSS == Opcode); |
44648 | |
44649 | |
44650 | APInt UndefElts0, UndefElts1; |
44651 | SmallVector<APInt, 32> EltBits0, EltBits1; |
44652 | if ((N0.isUndef() || N->isOnlyUserOf(N0.getNode())) && |
44653 | (N1.isUndef() || N->isOnlyUserOf(N1.getNode())) && |
44654 | getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) && |
44655 | getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) { |
44656 | unsigned NumLanes = VT.getSizeInBits() / 128; |
44657 | unsigned NumSrcElts = NumDstElts / 2; |
44658 | unsigned NumDstEltsPerLane = NumDstElts / NumLanes; |
44659 | unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; |
44660 | |
44661 | APInt Undefs(NumDstElts, 0); |
44662 | SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt)); |
44663 | for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { |
44664 | for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) { |
44665 | unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane; |
44666 | auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0); |
44667 | auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0); |
44668 | |
44669 | if (UndefElts[SrcIdx]) { |
44670 | Undefs.setBit(Lane * NumDstEltsPerLane + Elt); |
44671 | continue; |
44672 | } |
44673 | |
44674 | APInt &Val = EltBits[SrcIdx]; |
44675 | if (IsSigned) { |
44676 | |
44677 | |
44678 | |
44679 | if (Val.isSignedIntN(DstBitsPerElt)) |
44680 | Val = Val.trunc(DstBitsPerElt); |
44681 | else if (Val.isNegative()) |
44682 | Val = APInt::getSignedMinValue(DstBitsPerElt); |
44683 | else |
44684 | Val = APInt::getSignedMaxValue(DstBitsPerElt); |
44685 | } else { |
44686 | |
44687 | |
44688 | |
44689 | if (Val.isIntN(DstBitsPerElt)) |
44690 | Val = Val.trunc(DstBitsPerElt); |
44691 | else if (Val.isNegative()) |
44692 | Val = APInt::getNullValue(DstBitsPerElt); |
44693 | else |
44694 | Val = APInt::getAllOnesValue(DstBitsPerElt); |
44695 | } |
44696 | Bits[Lane * NumDstEltsPerLane + Elt] = Val; |
44697 | } |
44698 | } |
44699 | |
44700 | return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); |
44701 | } |
44702 | |
44703 | |
44704 | if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget)) |
44705 | return V; |
44706 | |
44707 | |
44708 | |
44709 | if (Subtarget.hasAVX512() && |
44710 | N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 && |
44711 | N0.getOperand(0).getValueType() == MVT::v8i32) { |
44712 | if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) || |
44713 | (!IsSigned && |
44714 | DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) { |
44715 | if (Subtarget.hasVLX()) |
44716 | return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0)); |
44717 | |
44718 | |
44719 | SDLoc dl(N); |
44720 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32, |
44721 | N0.getOperand(0), DAG.getUNDEF(MVT::v8i32)); |
44722 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat); |
44723 | } |
44724 | } |
44725 | |
44726 | |
44727 | if (VT.is128BitVector()) { |
44728 | unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
44729 | SDValue Src0, Src1; |
44730 | if (N0.getOpcode() == ExtOpc && |
44731 | N0.getOperand(0).getValueType().is64BitVector() && |
44732 | N0.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) { |
44733 | Src0 = N0.getOperand(0); |
44734 | } |
44735 | if (N1.getOpcode() == ExtOpc && |
44736 | N1.getOperand(0).getValueType().is64BitVector() && |
44737 | N1.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) { |
44738 | Src1 = N1.getOperand(0); |
44739 | } |
44740 | if ((Src0 || N0.isUndef()) && (Src1 || N1.isUndef())) { |
44741 | assert((Src0 || Src1) && "Found PACK(UNDEF,UNDEF)"); |
44742 | Src0 = Src0 ? Src0 : DAG.getUNDEF(Src1.getValueType()); |
44743 | Src1 = Src1 ? Src1 : DAG.getUNDEF(Src0.getValueType()); |
44744 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1); |
44745 | } |
44746 | } |
44747 | |
44748 | |
44749 | SDValue Op(N, 0); |
44750 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
44751 | return Res; |
44752 | |
44753 | return SDValue(); |
44754 | } |
44755 | |
44756 | static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG, |
44757 | TargetLowering::DAGCombinerInfo &DCI, |
44758 | const X86Subtarget &Subtarget) { |
44759 | assert((X86ISD::HADD == N->getOpcode() || X86ISD::FHADD == N->getOpcode() || |
44760 | X86ISD::HSUB == N->getOpcode() || X86ISD::FHSUB == N->getOpcode()) && |
44761 | "Unexpected horizontal add/sub opcode"); |
44762 | |
44763 | if (!shouldUseHorizontalOp(true, DAG, Subtarget)) { |
44764 | |
44765 | |
44766 | MVT VT = N->getSimpleValueType(0); |
44767 | SDValue LHS = N->getOperand(0); |
44768 | SDValue RHS = N->getOperand(1); |
44769 | if (VT.is128BitVector() && LHS == RHS) { |
44770 | for (SDNode *User : LHS->uses()) { |
44771 | if (User != N && User->getOpcode() == N->getOpcode()) { |
44772 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; |
44773 | if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) { |
44774 | return DAG.getBitcast( |
44775 | VT, |
44776 | DAG.getVectorShuffle(ShufVT, SDLoc(N), |
44777 | DAG.getBitcast(ShufVT, SDValue(User, 0)), |
44778 | DAG.getUNDEF(ShufVT), {0, 1, 0, 1})); |
44779 | } |
44780 | if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) { |
44781 | return DAG.getBitcast( |
44782 | VT, |
44783 | DAG.getVectorShuffle(ShufVT, SDLoc(N), |
44784 | DAG.getBitcast(ShufVT, SDValue(User, 0)), |
44785 | DAG.getUNDEF(ShufVT), {2, 3, 2, 3})); |
44786 | } |
44787 | } |
44788 | } |
44789 | } |
44790 | |
44791 | |
44792 | if (LHS != RHS && LHS.getOpcode() == N->getOpcode() && |
44793 | LHS.getOpcode() == RHS.getOpcode() && |
44794 | LHS.getValueType() == RHS.getValueType()) { |
44795 | SDValue LHS0 = LHS.getOperand(0); |
44796 | SDValue RHS0 = LHS.getOperand(1); |
44797 | SDValue LHS1 = RHS.getOperand(0); |
44798 | SDValue RHS1 = RHS.getOperand(1); |
44799 | if ((LHS0 == RHS0 || LHS0.isUndef() || RHS0.isUndef()) && |
44800 | (LHS1 == RHS1 || LHS1.isUndef() || RHS1.isUndef())) { |
44801 | SDLoc DL(N); |
44802 | SDValue Res = DAG.getNode(LHS.getOpcode(), DL, LHS.getValueType(), |
44803 | LHS0.isUndef() ? RHS0 : LHS0, |
44804 | LHS1.isUndef() ? RHS1 : LHS1); |
44805 | MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); |
44806 | Res = DAG.getBitcast(ShufVT, Res); |
44807 | SDValue NewLHS = |
44808 | DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res, |
44809 | getV4X86ShuffleImm8ForMask({0, 1, 0, 1}, DL, DAG)); |
44810 | SDValue NewRHS = |
44811 | DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res, |
44812 | getV4X86ShuffleImm8ForMask({2, 3, 2, 3}, DL, DAG)); |
44813 | DAG.ReplaceAllUsesOfValueWith(LHS, DAG.getBitcast(VT, NewLHS)); |
44814 | DAG.ReplaceAllUsesOfValueWith(RHS, DAG.getBitcast(VT, NewRHS)); |
44815 | return SDValue(N, 0); |
44816 | } |
44817 | } |
44818 | } |
44819 | |
44820 | |
44821 | if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget)) |
44822 | return V; |
44823 | |
44824 | return SDValue(); |
44825 | } |
44826 | |
44827 | static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG, |
44828 | TargetLowering::DAGCombinerInfo &DCI, |
44829 | const X86Subtarget &Subtarget) { |
44830 | assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() || |
44831 | X86ISD::VSRL == N->getOpcode()) && |
44832 | "Unexpected shift opcode"); |
44833 | EVT VT = N->getValueType(0); |
44834 | SDValue N0 = N->getOperand(0); |
44835 | SDValue N1 = N->getOperand(1); |
44836 | |
44837 | |
44838 | if (ISD::isBuildVectorAllZeros(N0.getNode())) |
44839 | return DAG.getConstant(0, SDLoc(N), VT); |
44840 | |
44841 | |
44842 | APInt UndefElts; |
44843 | SmallVector<APInt, 32> EltBits; |
44844 | if (getTargetConstantBitsFromNode(N1, 64, UndefElts, EltBits, true, false)) { |
44845 | unsigned X86Opc = getTargetVShiftUniformOpcode(N->getOpcode(), false); |
44846 | return getTargetVShiftByConstNode(X86Opc, SDLoc(N), VT.getSimpleVT(), N0, |
44847 | EltBits[0].getZExtValue(), DAG); |
44848 | } |
44849 | |
44850 | APInt KnownUndef, KnownZero; |
44851 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
44852 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
44853 | if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, |
44854 | KnownZero, DCI)) |
44855 | return SDValue(N, 0); |
44856 | |
44857 | return SDValue(); |
44858 | } |
44859 | |
44860 | static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, |
44861 | TargetLowering::DAGCombinerInfo &DCI, |
44862 | const X86Subtarget &Subtarget) { |
44863 | unsigned Opcode = N->getOpcode(); |
44864 | assert((X86ISD::VSHLI == Opcode || X86ISD::VSRAI == Opcode || |
44865 | X86ISD::VSRLI == Opcode) && |
44866 | "Unexpected shift opcode"); |
44867 | bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode; |
44868 | EVT VT = N->getValueType(0); |
44869 | SDValue N0 = N->getOperand(0); |
44870 | unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
44871 | assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 && |
44872 | "Unexpected value type"); |
44873 | assert(N->getOperand(1).getValueType() == MVT::i8 && |
44874 | "Unexpected shift amount type"); |
44875 | |
44876 | |
44877 | if (N0.isUndef()) |
44878 | return DAG.getConstant(0, SDLoc(N), VT); |
44879 | |
44880 | |
44881 | |
44882 | unsigned ShiftVal = N->getConstantOperandVal(1); |
44883 | if (ShiftVal >= NumBitsPerElt) { |
44884 | if (LogicalShift) |
44885 | return DAG.getConstant(0, SDLoc(N), VT); |
44886 | ShiftVal = NumBitsPerElt - 1; |
44887 | } |
44888 | |
44889 | |
44890 | if (!ShiftVal) |
44891 | return N0; |
44892 | |
44893 | |
44894 | if (ISD::isBuildVectorAllZeros(N0.getNode())) |
44895 | |
44896 | |
44897 | return DAG.getConstant(0, SDLoc(N), VT); |
44898 | |
44899 | |
44900 | if (!LogicalShift && ISD::isBuildVectorAllOnes(N0.getNode())) |
44901 | |
44902 | |
44903 | return DAG.getConstant(-1, SDLoc(N), VT); |
44904 | |
44905 | |
44906 | if (Opcode == N0.getOpcode()) { |
44907 | unsigned ShiftVal2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); |
44908 | unsigned NewShiftVal = ShiftVal + ShiftVal2; |
44909 | if (NewShiftVal >= NumBitsPerElt) { |
44910 | |
44911 | |
44912 | if (LogicalShift) |
44913 | return DAG.getConstant(0, SDLoc(N), VT); |
44914 | NewShiftVal = NumBitsPerElt - 1; |
44915 | } |
44916 | return DAG.getNode(Opcode, SDLoc(N), VT, N0.getOperand(0), |
44917 | DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8)); |
44918 | } |
44919 | |
44920 | |
44921 | if (LogicalShift && (ShiftVal % 8) == 0) { |
44922 | SDValue Op(N, 0); |
44923 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
44924 | return Res; |
44925 | } |
44926 | |
44927 | |
44928 | APInt UndefElts; |
44929 | SmallVector<APInt, 32> EltBits; |
44930 | if (N->isOnlyUserOf(N0.getNode()) && |
44931 | getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) { |
44932 | assert(EltBits.size() == VT.getVectorNumElements() && |
44933 | "Unexpected shift value type"); |
44934 | |
44935 | |
44936 | |
44937 | for (unsigned i = 0, e = EltBits.size(); i != e; ++i) { |
44938 | APInt &Elt = EltBits[i]; |
44939 | if (UndefElts[i]) |
44940 | Elt = 0; |
44941 | else if (X86ISD::VSHLI == Opcode) |
44942 | Elt <<= ShiftVal; |
44943 | else if (X86ISD::VSRAI == Opcode) |
44944 | Elt.ashrInPlace(ShiftVal); |
44945 | else |
44946 | Elt.lshrInPlace(ShiftVal); |
44947 | } |
44948 | |
44949 | UndefElts = 0; |
44950 | return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); |
44951 | } |
44952 | |
44953 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
44954 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), |
44955 | APInt::getAllOnesValue(NumBitsPerElt), DCI)) |
44956 | return SDValue(N, 0); |
44957 | |
44958 | return SDValue(); |
44959 | } |
44960 | |
44961 | static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, |
44962 | TargetLowering::DAGCombinerInfo &DCI, |
44963 | const X86Subtarget &Subtarget) { |
44964 | EVT VT = N->getValueType(0); |
44965 | assert(((N->getOpcode() == X86ISD::PINSRB && VT == MVT::v16i8) || |
44966 | (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16) || |
44967 | N->getOpcode() == ISD::INSERT_VECTOR_ELT) && |
44968 | "Unexpected vector insertion"); |
44969 | |
44970 | if (N->getOpcode() == X86ISD::PINSRB || N->getOpcode() == X86ISD::PINSRW) { |
44971 | unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
44972 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
44973 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), |
44974 | APInt::getAllOnesValue(NumBitsPerElt), DCI)) |
44975 | return SDValue(N, 0); |
44976 | } |
44977 | |
44978 | |
44979 | if (VT.isSimple() && DCI.isAfterLegalizeDAG()) { |
44980 | SDValue Op(N, 0); |
44981 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
44982 | return Res; |
44983 | } |
44984 | |
44985 | return SDValue(); |
44986 | } |
44987 | |
44988 | |
44989 | |
44990 | |
44991 | static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, |
44992 | TargetLowering::DAGCombinerInfo &DCI, |
44993 | const X86Subtarget &Subtarget) { |
44994 | unsigned opcode; |
44995 | |
44996 | |
44997 | |
44998 | if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { |
44999 | SDValue N0 = N->getOperand(0); |
45000 | SDValue N1 = N->getOperand(1); |
45001 | SDValue CMP0 = N0.getOperand(1); |
45002 | SDValue CMP1 = N1.getOperand(1); |
45003 | SDLoc DL(N); |
45004 | |
45005 | |
45006 | if (CMP0.getOpcode() != X86ISD::FCMP || CMP0 != CMP1) |
45007 | return SDValue(); |
45008 | |
45009 | SDValue CMP00 = CMP0->getOperand(0); |
45010 | SDValue CMP01 = CMP0->getOperand(1); |
45011 | EVT VT = CMP00.getValueType(); |
45012 | |
45013 | if (VT == MVT::f32 || VT == MVT::f64 || |
45014 | (VT == MVT::f16 && Subtarget.hasFP16())) { |
45015 | bool ExpectingFlags = false; |
45016 | |
45017 | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); |
45018 | !ExpectingFlags && UI != UE; ++UI) |
45019 | switch (UI->getOpcode()) { |
45020 | default: |
45021 | case ISD::BR_CC: |
45022 | case ISD::BRCOND: |
45023 | case ISD::SELECT: |
45024 | ExpectingFlags = true; |
45025 | break; |
45026 | case ISD::CopyToReg: |
45027 | case ISD::SIGN_EXTEND: |
45028 | case ISD::ZERO_EXTEND: |
45029 | case ISD::ANY_EXTEND: |
45030 | break; |
45031 | } |
45032 | |
45033 | if (!ExpectingFlags) { |
45034 | enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0); |
45035 | enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0); |
45036 | |
45037 | if (cc1 == X86::COND_E || cc1 == X86::COND_NE) { |
45038 | X86::CondCode tmp = cc0; |
45039 | cc0 = cc1; |
45040 | cc1 = tmp; |
45041 | } |
45042 | |
45043 | if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) || |
45044 | (cc0 == X86::COND_NE && cc1 == X86::COND_P)) { |
45045 | |
45046 | |
45047 | unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; |
45048 | if (Subtarget.hasAVX512()) { |
45049 | SDValue FSetCC = |
45050 | DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, |
45051 | DAG.getTargetConstant(x86cc, DL, MVT::i8)); |
45052 | |
45053 | |
45054 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1, |
45055 | DAG.getConstant(0, DL, MVT::v16i1), |
45056 | FSetCC, DAG.getIntPtrConstant(0, DL)); |
45057 | return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL, |
45058 | N->getSimpleValueType(0)); |
45059 | } |
45060 | SDValue OnesOrZeroesF = |
45061 | DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, |
45062 | CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8)); |
45063 | |
45064 | bool is64BitFP = (CMP00.getValueType() == MVT::f64); |
45065 | MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; |
45066 | |
45067 | if (is64BitFP && !Subtarget.is64Bit()) { |
45068 | |
45069 | |
45070 | |
45071 | |
45072 | |
45073 | SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, |
45074 | OnesOrZeroesF); |
45075 | SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64); |
45076 | OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, |
45077 | Vector32, DAG.getIntPtrConstant(0, DL)); |
45078 | IntVT = MVT::i32; |
45079 | } |
45080 | |
45081 | SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF); |
45082 | SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, |
45083 | DAG.getConstant(1, DL, IntVT)); |
45084 | SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, |
45085 | ANDed); |
45086 | return OneBitOfTruth; |
45087 | } |
45088 | } |
45089 | } |
45090 | } |
45091 | return SDValue(); |
45092 | } |
45093 | |
45094 | |
45095 | static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { |
45096 | assert(N->getOpcode() == ISD::AND); |
45097 | |
45098 | MVT VT = N->getSimpleValueType(0); |
45099 | if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) |
45100 | return SDValue(); |
45101 | |
45102 | SDValue X, Y; |
45103 | SDValue N0 = N->getOperand(0); |
45104 | SDValue N1 = N->getOperand(1); |
45105 | |
45106 | auto GetNot = [&VT, &DAG](SDValue V) { |
45107 | |
45108 | if (SDValue Not = IsNOT(V, DAG)) |
45109 | return Not; |
45110 | |
45111 | if (V.getOpcode() == X86ISD::VBROADCAST) { |
45112 | SDValue Src = V.getOperand(0); |
45113 | EVT SrcVT = Src.getValueType(); |
45114 | if (!SrcVT.isVector()) |
45115 | return SDValue(); |
45116 | if (SDValue Not = IsNOT(Src, DAG)) |
45117 | return DAG.getNode(X86ISD::VBROADCAST, SDLoc(V), VT, |
45118 | DAG.getBitcast(SrcVT, Not)); |
45119 | } |
45120 | return SDValue(); |
45121 | }; |
45122 | |
45123 | if (SDValue Not = GetNot(N0)) { |
45124 | X = Not; |
45125 | Y = N1; |
45126 | } else if (SDValue Not = GetNot(N1)) { |
45127 | X = Not; |
45128 | Y = N0; |
45129 | } else |
45130 | return SDValue(); |
45131 | |
45132 | X = DAG.getBitcast(VT, X); |
45133 | Y = DAG.getBitcast(VT, Y); |
45134 | return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y); |
45135 | } |
45136 | |
45137 | |
45138 | |
45139 | |
45140 | |
45141 | |
45142 | |
45143 | |
45144 | |
45145 | |
45146 | static SDValue PromoteMaskArithmetic(SDNode *N, EVT VT, SelectionDAG &DAG, |
45147 | unsigned Depth) { |
45148 | |
45149 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
45150 | return SDValue(); |
45151 | |
45152 | if (N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND && |
45153 | N->getOpcode() != ISD::OR) |
45154 | return SDValue(); |
45155 | |
45156 | SDValue N0 = N->getOperand(0); |
45157 | SDValue N1 = N->getOperand(1); |
45158 | SDLoc DL(N); |
45159 | |
45160 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
45161 | if (!TLI.isOperationLegalOrPromote(N->getOpcode(), VT)) |
45162 | return SDValue(); |
45163 | |
45164 | if (SDValue NN0 = PromoteMaskArithmetic(N0.getNode(), VT, DAG, Depth + 1)) |
45165 | N0 = NN0; |
45166 | else { |
45167 | |
45168 | if (N0.getOpcode() != ISD::TRUNCATE) |
45169 | return SDValue(); |
45170 | |
45171 | |
45172 | if (N0.getOperand(0).getValueType() != VT) |
45173 | return SDValue(); |
45174 | |
45175 | N0 = N0.getOperand(0); |
45176 | } |
45177 | |
45178 | if (SDValue NN1 = PromoteMaskArithmetic(N1.getNode(), VT, DAG, Depth + 1)) |
45179 | N1 = NN1; |
45180 | else { |
45181 | |
45182 | bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && |
45183 | N1.getOperand(0).getValueType() == VT; |
45184 | if (!RHSTrunc && !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) |
45185 | return SDValue(); |
45186 | |
45187 | if (RHSTrunc) |
45188 | N1 = N1.getOperand(0); |
45189 | else |
45190 | N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1); |
45191 | } |
45192 | |
45193 | return DAG.getNode(N->getOpcode(), DL, VT, N0, N1); |
45194 | } |
45195 | |
45196 | |
45197 | |
45198 | |
45199 | |
45200 | |
45201 | |
45202 | static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG, |
45203 | const X86Subtarget &Subtarget) { |
45204 | EVT VT = N->getValueType(0); |
45205 | assert(VT.isVector() && "Expected vector type"); |
45206 | |
45207 | SDLoc DL(N); |
45208 | assert((N->getOpcode() == ISD::ANY_EXTEND || |
45209 | N->getOpcode() == ISD::ZERO_EXTEND || |
45210 | N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); |
45211 | |
45212 | SDValue Narrow = N->getOperand(0); |
45213 | EVT NarrowVT = Narrow.getValueType(); |
45214 | |
45215 | |
45216 | SDValue Op = PromoteMaskArithmetic(Narrow.getNode(), VT, DAG, 0); |
45217 | if (!Op) |
45218 | return SDValue(); |
45219 | switch (N->getOpcode()) { |
45220 | default: llvm_unreachable("Unexpected opcode"); |
45221 | case ISD::ANY_EXTEND: |
45222 | return Op; |
45223 | case ISD::ZERO_EXTEND: |
45224 | return DAG.getZeroExtendInReg(Op, DL, NarrowVT); |
45225 | case ISD::SIGN_EXTEND: |
45226 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, |
45227 | Op, DAG.getValueType(NarrowVT)); |
45228 | } |
45229 | } |
45230 | |
45231 | static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode) { |
45232 | unsigned FPOpcode; |
45233 | switch (Opcode) { |
45234 | default: llvm_unreachable("Unexpected input node for FP logic conversion"); |
45235 | case ISD::AND: FPOpcode = X86ISD::FAND; break; |
45236 | case ISD::OR: FPOpcode = X86ISD::FOR; break; |
45237 | case ISD::XOR: FPOpcode = X86ISD::FXOR; break; |
45238 | } |
45239 | return FPOpcode; |
45240 | } |
45241 | |
45242 | |
45243 | |
45244 | |
45245 | static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, |
45246 | const X86Subtarget &Subtarget) { |
45247 | EVT VT = N->getValueType(0); |
45248 | SDValue N0 = N->getOperand(0); |
45249 | SDValue N1 = N->getOperand(1); |
45250 | SDLoc DL(N); |
45251 | |
45252 | if (N0.getOpcode() != ISD::BITCAST || N1.getOpcode() != ISD::BITCAST) |
45253 | return SDValue(); |
45254 | |
45255 | SDValue N00 = N0.getOperand(0); |
45256 | SDValue N10 = N1.getOperand(0); |
45257 | EVT N00Type = N00.getValueType(); |
45258 | EVT N10Type = N10.getValueType(); |
45259 | |
45260 | |
45261 | if (N00Type != N10Type || !((Subtarget.hasSSE1() && N00Type == MVT::f32) || |
45262 | (Subtarget.hasSSE2() && N00Type == MVT::f64) || |
45263 | (Subtarget.hasFP16() && N00Type == MVT::f16))) |
45264 | return SDValue(); |
45265 | |
45266 | unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode()); |
45267 | SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10); |
45268 | return DAG.getBitcast(VT, FPLogic); |
45269 | } |
45270 | |
45271 | |
45272 | |
45273 | static SDValue combineBitOpWithMOVMSK(SDNode *N, SelectionDAG &DAG) { |
45274 | unsigned Opc = N->getOpcode(); |
45275 | assert((Opc == ISD::OR || Opc == ISD::AND || Opc == ISD::XOR) && |
45276 | "Unexpected bit opcode"); |
45277 | |
45278 | SDValue N0 = N->getOperand(0); |
45279 | SDValue N1 = N->getOperand(1); |
45280 | |
45281 | |
45282 | if (N0.getOpcode() != X86ISD::MOVMSK || !N0.hasOneUse() || |
45283 | N1.getOpcode() != X86ISD::MOVMSK || !N1.hasOneUse()) |
45284 | return SDValue(); |
45285 | |
45286 | SDValue Vec0 = N0.getOperand(0); |
45287 | SDValue Vec1 = N1.getOperand(0); |
45288 | EVT VecVT0 = Vec0.getValueType(); |
45289 | EVT VecVT1 = Vec1.getValueType(); |
45290 | |
45291 | |
45292 | |
45293 | if (VecVT0.getSizeInBits() != VecVT1.getSizeInBits() || |
45294 | VecVT0.getScalarSizeInBits() != VecVT1.getScalarSizeInBits()) |
45295 | return SDValue(); |
45296 | |
45297 | SDLoc DL(N); |
45298 | unsigned VecOpc = |
45299 | VecVT0.isFloatingPoint() ? convertIntLogicToFPLogicOpcode(Opc) : Opc; |
45300 | SDValue Result = |
45301 | DAG.getNode(VecOpc, DL, VecVT0, Vec0, DAG.getBitcast(VecVT0, Vec1)); |
45302 | return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
45303 | } |
45304 | |
45305 | |
45306 | |
45307 | |
45308 | static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, |
45309 | const X86Subtarget &Subtarget) { |
45310 | SDValue Op0 = peekThroughBitcasts(N->getOperand(0)); |
45311 | SDValue Op1 = peekThroughBitcasts(N->getOperand(1)); |
45312 | EVT VT0 = Op0.getValueType(); |
45313 | EVT VT1 = Op1.getValueType(); |
45314 | |
45315 | if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger()) |
45316 | return SDValue(); |
45317 | |
45318 | APInt SplatVal; |
45319 | if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || |
45320 | !SplatVal.isMask()) |
45321 | return SDValue(); |
45322 | |
45323 | |
45324 | if (isBitwiseNot(Op0)) |
45325 | return SDValue(); |
45326 | |
45327 | if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL)) |
45328 | return SDValue(); |
45329 | |
45330 | unsigned EltBitWidth = VT0.getScalarSizeInBits(); |
45331 | if (EltBitWidth != DAG.ComputeNumSignBits(Op0)) |
45332 | return SDValue(); |
45333 | |
45334 | SDLoc DL(N); |
45335 | unsigned ShiftVal = SplatVal.countTrailingOnes(); |
45336 | SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8); |
45337 | SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt); |
45338 | return DAG.getBitcast(N->getValueType(0), Shift); |
45339 | } |
45340 | |
45341 | |
45342 | |
45343 | static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld) { |
45344 | if (Ld->isIndexed()) |
45345 | return SDValue(); |
45346 | |
45347 | SDValue Base = Ld->getBasePtr(); |
45348 | |
45349 | if (Base.getOpcode() != ISD::ADD) |
45350 | return SDValue(); |
45351 | |
45352 | SDValue ShiftedIndex = Base.getOperand(0); |
45353 | |
45354 | if (ShiftedIndex.getOpcode() != ISD::SHL) |
45355 | return SDValue(); |
45356 | |
45357 | return ShiftedIndex.getOperand(0); |
45358 | |
45359 | } |
45360 | |
45361 | static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { |
45362 | if (Subtarget.hasBMI2() && VT.isScalarInteger()) { |
45363 | switch (VT.getSizeInBits()) { |
45364 | default: return false; |
45365 | case 64: return Subtarget.is64Bit() ? true : false; |
45366 | case 32: return true; |
45367 | } |
45368 | } |
45369 | return false; |
45370 | } |
45371 | |
45372 | |
45373 | |
45374 | |
45375 | |
45376 | |
45377 | |
45378 | |
45379 | |
45380 | |
45381 | |
45382 | static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, |
45383 | const X86Subtarget &Subtarget) { |
45384 | MVT VT = Node->getSimpleValueType(0); |
45385 | SDLoc dl(Node); |
45386 | |
45387 | |
45388 | if (!hasBZHI(Subtarget, VT)) |
45389 | return SDValue(); |
45390 | |
45391 | |
45392 | for (unsigned i = 0; i < 2; i++) { |
45393 | SDValue N = Node->getOperand(i); |
45394 | LoadSDNode *Ld = dyn_cast<LoadSDNode>(N.getNode()); |
45395 | |
45396 | |
45397 | if (!Ld) |
45398 | return SDValue(); |
45399 | |
45400 | const Value *MemOp = Ld->getMemOperand()->getValue(); |
45401 | |
45402 | if (!MemOp) |
45403 | return SDValue(); |
45404 | |
45405 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(MemOp)) { |
45406 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) { |
45407 | if (GV->isConstant() && GV->hasDefinitiveInitializer()) { |
45408 | |
45409 | Constant *Init = GV->getInitializer(); |
45410 | Type *Ty = Init->getType(); |
45411 | if (!isa<ConstantDataArray>(Init) || |
45412 | !Ty->getArrayElementType()->isIntegerTy() || |
45413 | Ty->getArrayElementType()->getScalarSizeInBits() != |
45414 | VT.getSizeInBits() || |
45415 | Ty->getArrayNumElements() > |
45416 | Ty->getArrayElementType()->getScalarSizeInBits()) |
45417 | continue; |
45418 | |
45419 | |
45420 | uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); |
45421 | bool ConstantsMatch = true; |
45422 | for (uint64_t j = 0; j < ArrayElementCount; j++) { |
45423 | auto *Elem = cast<ConstantInt>(Init->getAggregateElement(j)); |
45424 | if (Elem->getZExtValue() != (((uint64_t)1 << j) - 1)) { |
45425 | ConstantsMatch = false; |
45426 | break; |
45427 | } |
45428 | } |
45429 | if (!ConstantsMatch) |
45430 | continue; |
45431 | |
45432 | |
45433 | |
45434 | |
45435 | |
45436 | SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0); |
45437 | SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32); |
45438 | |
45439 | |
45440 | SDValue Index = getIndexFromUnindexedLoad(Ld); |
45441 | if (!Index) |
45442 | return SDValue(); |
45443 | Index = DAG.getZExtOrTrunc(Index, dl, MVT::i32); |
45444 | |
45445 | SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, SizeC, Index); |
45446 | Sub = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Sub); |
45447 | |
45448 | SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); |
45449 | SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub); |
45450 | |
45451 | return DAG.getNode(ISD::AND, dl, VT, Inp, LShr); |
45452 | } |
45453 | } |
45454 | } |
45455 | } |
45456 | return SDValue(); |
45457 | } |
45458 | |
45459 | |
45460 | |
45461 | |
45462 | |
45463 | |
45464 | static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG, |
45465 | const X86Subtarget &Subtarget) { |
45466 | assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); |
45467 | |
45468 | EVT VT = N->getValueType(0); |
45469 | |
45470 | |
45471 | |
45472 | if (!isa<ConstantSDNode>(N->getOperand(1))) |
45473 | return SDValue(); |
45474 | |
45475 | |
45476 | assert(!VT.isVector() && "Expected scalar VT!"); |
45477 | |
45478 | if (N->getOperand(0).getOpcode() != ISD::BITCAST || |
45479 | !N->getOperand(0).hasOneUse() || |
45480 | !N->getOperand(0).getOperand(0).hasOneUse()) |
45481 | return SDValue(); |
45482 | |
45483 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
45484 | SDValue Src = N->getOperand(0).getOperand(0); |
45485 | EVT SrcVT = Src.getValueType(); |
45486 | if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::i1 || |
45487 | !TLI.isTypeLegal(SrcVT)) |
45488 | return SDValue(); |
45489 | |
45490 | if (Src.getOpcode() != ISD::CONCAT_VECTORS) |
45491 | return SDValue(); |
45492 | |
45493 | |
45494 | |
45495 | SDValue SubVec = Src.getOperand(0); |
45496 | EVT SubVecVT = SubVec.getValueType(); |
45497 | |
45498 | |
45499 | |
45500 | if (SubVec.getOpcode() != ISD::SETCC || !TLI.isTypeLegal(SubVecVT) || |
45501 | !N->getConstantOperandAPInt(1).isMask(SubVecVT.getVectorNumElements())) |
45502 | return SDValue(); |
45503 | |
45504 | EVT SetccVT = SubVec.getOperand(0).getValueType(); |
45505 | if (!TLI.isTypeLegal(SetccVT) || |
45506 | !(Subtarget.hasVLX() || SetccVT.is512BitVector())) |
45507 | return SDValue(); |
45508 | |
45509 | if (!(Subtarget.hasBWI() || SetccVT.getScalarSizeInBits() >= 32)) |
45510 | return SDValue(); |
45511 | |
45512 | |
45513 | |
45514 | SDLoc dl(N); |
45515 | SmallVector<SDValue, 4> Ops(Src.getNumOperands(), |
45516 | DAG.getConstant(0, dl, SubVecVT)); |
45517 | Ops[0] = SubVec; |
45518 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, |
45519 | Ops); |
45520 | return DAG.getBitcast(VT, Concat); |
45521 | } |
45522 | |
45523 | static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, |
45524 | TargetLowering::DAGCombinerInfo &DCI, |
45525 | const X86Subtarget &Subtarget) { |
45526 | EVT VT = N->getValueType(0); |
45527 | |
45528 | |
45529 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { |
45530 | return DAG.getBitcast( |
45531 | MVT::v4i32, DAG.getNode(X86ISD::FAND, SDLoc(N), MVT::v4f32, |
45532 | DAG.getBitcast(MVT::v4f32, N->getOperand(0)), |
45533 | DAG.getBitcast(MVT::v4f32, N->getOperand(1)))); |
45534 | } |
45535 | |
45536 | |
45537 | if (VT == MVT::i64 && Subtarget.is64Bit() && |
45538 | !isa<ConstantSDNode>(N->getOperand(1))) { |
45539 | APInt HiMask = APInt::getHighBitsSet(64, 32); |
45540 | if (DAG.MaskedValueIsZero(N->getOperand(1), HiMask) || |
45541 | DAG.MaskedValueIsZero(N->getOperand(0), HiMask)) { |
45542 | SDLoc dl(N); |
45543 | SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(0)); |
45544 | SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(1)); |
45545 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, |
45546 | DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS)); |
45547 | } |
45548 | } |
45549 | |
45550 | |
45551 | |
45552 | if (VT == MVT::i1) { |
45553 | SmallVector<SDValue, 2> SrcOps; |
45554 | SmallVector<APInt, 2> SrcPartials; |
45555 | if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps, &SrcPartials) && |
45556 | SrcOps.size() == 1) { |
45557 | SDLoc dl(N); |
45558 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
45559 | unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); |
45560 | EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
45561 | SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); |
45562 | if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType())) |
45563 | Mask = DAG.getBitcast(MaskVT, SrcOps[0]); |
45564 | if (Mask) { |
45565 | assert(SrcPartials[0].getBitWidth() == NumElts && |
45566 | "Unexpected partial reduction mask"); |
45567 | SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT); |
45568 | Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits); |
45569 | return DAG.getSetCC(dl, MVT::i1, Mask, PartialBits, ISD::SETEQ); |
45570 | } |
45571 | } |
45572 | } |
45573 | |
45574 | if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget)) |
45575 | return V; |
45576 | |
45577 | if (SDValue R = combineBitOpWithMOVMSK(N, DAG)) |
45578 | return R; |
45579 | |
45580 | if (DCI.isBeforeLegalizeOps()) |
45581 | return SDValue(); |
45582 | |
45583 | if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) |
45584 | return R; |
45585 | |
45586 | if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) |
45587 | return FPLogic; |
45588 | |
45589 | if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG)) |
45590 | return R; |
45591 | |
45592 | if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget)) |
45593 | return ShiftRight; |
45594 | |
45595 | if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget)) |
45596 | return R; |
45597 | |
45598 | |
45599 | if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { |
45600 | SDValue Op(N, 0); |
45601 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
45602 | return Res; |
45603 | } |
45604 | |
45605 | |
45606 | if ((VT.getScalarSizeInBits() % 8) == 0 && |
45607 | N->getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
45608 | isa<ConstantSDNode>(N->getOperand(0).getOperand(1))) { |
45609 | SDValue BitMask = N->getOperand(1); |
45610 | SDValue SrcVec = N->getOperand(0).getOperand(0); |
45611 | EVT SrcVecVT = SrcVec.getValueType(); |
45612 | |
45613 | |
45614 | APInt UndefElts; |
45615 | SmallVector<APInt, 64> EltBits; |
45616 | if (VT == SrcVecVT.getScalarType() && |
45617 | N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) && |
45618 | getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) && |
45619 | llvm::all_of(EltBits, [](const APInt &M) { |
45620 | return M.isNullValue() || M.isAllOnesValue(); |
45621 | })) { |
45622 | unsigned NumElts = SrcVecVT.getVectorNumElements(); |
45623 | unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8; |
45624 | unsigned Idx = N->getOperand(0).getConstantOperandVal(1); |
45625 | |
45626 | |
45627 | SmallVector<int, 16> ShuffleMask(NumElts * Scale, SM_SentinelUndef); |
45628 | for (unsigned i = 0; i != Scale; ++i) { |
45629 | if (UndefElts[i]) |
45630 | continue; |
45631 | int VecIdx = Scale * Idx + i; |
45632 | ShuffleMask[VecIdx] = |
45633 | EltBits[i].isNullValue() ? SM_SentinelZero : VecIdx; |
45634 | } |
45635 | |
45636 | if (SDValue Shuffle = combineX86ShufflesRecursively( |
45637 | {SrcVec}, 0, SrcVec, ShuffleMask, {}, 1, |
45638 | X86::MaxShuffleCombineDepth, |
45639 | false, true, |
45640 | true, DAG, Subtarget)) |
45641 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, |
45642 | N->getOperand(0).getOperand(1)); |
45643 | } |
45644 | } |
45645 | |
45646 | return SDValue(); |
45647 | } |
45648 | |
45649 | |
45650 | static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, |
45651 | const X86Subtarget &Subtarget) { |
45652 | assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); |
45653 | |
45654 | MVT VT = N->getSimpleValueType(0); |
45655 | if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0) |
45656 | return SDValue(); |
45657 | |
45658 | SDValue N0 = peekThroughBitcasts(N->getOperand(0)); |
45659 | SDValue N1 = peekThroughBitcasts(N->getOperand(1)); |
45660 | if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) |
45661 | return SDValue(); |
45662 | |
45663 | |
45664 | |
45665 | bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) || |
45666 | Subtarget.hasVLX(); |
45667 | if (!(Subtarget.hasXOP() || UseVPTERNLOG || |
45668 | !N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse())) |
45669 | return SDValue(); |
45670 | |
45671 | |
45672 | APInt UndefElts0, UndefElts1; |
45673 | SmallVector<APInt, 32> EltBits0, EltBits1; |
45674 | if (!getTargetConstantBitsFromNode(N0.getOperand(1), 8, UndefElts0, EltBits0, |
45675 | false, false)) |
45676 | return SDValue(); |
45677 | if (!getTargetConstantBitsFromNode(N1.getOperand(1), 8, UndefElts1, EltBits1, |
45678 | false, false)) |
45679 | return SDValue(); |
45680 | |
45681 | for (unsigned i = 0, e = EltBits0.size(); i != e; ++i) { |
45682 | |
45683 | if (UndefElts0[i] || UndefElts1[i]) |
45684 | return SDValue(); |
45685 | if (EltBits0[i] != ~EltBits1[i]) |
45686 | return SDValue(); |
45687 | } |
45688 | |
45689 | SDLoc DL(N); |
45690 | |
45691 | if (UseVPTERNLOG) { |
45692 | |
45693 | SDValue A = DAG.getBitcast(VT, N0.getOperand(1)); |
45694 | SDValue B = DAG.getBitcast(VT, N0.getOperand(0)); |
45695 | SDValue C = DAG.getBitcast(VT, N1.getOperand(0)); |
45696 | SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8); |
45697 | return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm); |
45698 | } |
45699 | |
45700 | SDValue X = N->getOperand(0); |
45701 | SDValue Y = |
45702 | DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)), |
45703 | DAG.getBitcast(VT, N1.getOperand(0))); |
45704 | return DAG.getNode(ISD::OR, DL, VT, X, Y); |
45705 | } |
45706 | |
45707 | |
45708 | static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) { |
45709 | if (N->getOpcode() != ISD::OR) |
45710 | return false; |
45711 | |
45712 | SDValue N0 = N->getOperand(0); |
45713 | SDValue N1 = N->getOperand(1); |
45714 | |
45715 | |
45716 | if (N1.getOpcode() == ISD::AND) |
45717 | std::swap(N0, N1); |
45718 | |
45719 | |
45720 | if (N0.getOpcode() != ISD::AND || N1.getOpcode() != X86ISD::ANDNP) |
45721 | return false; |
45722 | |
45723 | Mask = N1.getOperand(0); |
45724 | X = N1.getOperand(1); |
45725 | |
45726 | |
45727 | if (N0.getOperand(0) == Mask) |
45728 | Y = N0.getOperand(1); |
45729 | else if (N0.getOperand(1) == Mask) |
45730 | Y = N0.getOperand(0); |
45731 | else |
45732 | return false; |
45733 | |
45734 | |
45735 | |
45736 | return true; |
45737 | } |
45738 | |
45739 | |
45740 | |
45741 | |
45742 | |
45743 | |
45744 | |
45745 | |
45746 | |
45747 | static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, |
45748 | const X86Subtarget &Subtarget) { |
45749 | assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); |
45750 | |
45751 | EVT VT = N->getValueType(0); |
45752 | if (!((VT.is128BitVector() && Subtarget.hasSSE2()) || |
45753 | (VT.is256BitVector() && Subtarget.hasInt256()))) |
45754 | return SDValue(); |
45755 | |
45756 | SDValue X, Y, Mask; |
45757 | if (!matchLogicBlend(N, X, Y, Mask)) |
45758 | return SDValue(); |
45759 | |
45760 | |
45761 | Mask = peekThroughBitcasts(Mask); |
45762 | X = peekThroughBitcasts(X); |
45763 | Y = peekThroughBitcasts(Y); |
45764 | |
45765 | EVT MaskVT = Mask.getValueType(); |
45766 | unsigned EltBits = MaskVT.getScalarSizeInBits(); |
45767 | |
45768 | |
45769 | if (!MaskVT.isInteger() || DAG.ComputeNumSignBits(Mask) != EltBits) |
45770 | return SDValue(); |
45771 | |
45772 | SDLoc DL(N); |
45773 | |
45774 | |
45775 | if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL, |
45776 | DAG, Subtarget)) |
45777 | return Res; |
45778 | |
45779 | |
45780 | if (!Subtarget.hasSSE41()) |
45781 | return SDValue(); |
45782 | |
45783 | |
45784 | if (Subtarget.hasVLX()) |
45785 | return SDValue(); |
45786 | |
45787 | MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8; |
45788 | |
45789 | X = DAG.getBitcast(BlendVT, X); |
45790 | Y = DAG.getBitcast(BlendVT, Y); |
45791 | Mask = DAG.getBitcast(BlendVT, Mask); |
45792 | Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X); |
45793 | return DAG.getBitcast(VT, Mask); |
45794 | } |
45795 | |
45796 | |
45797 | |
45798 | |
45799 | |
45800 | |
45801 | |
45802 | static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, |
45803 | SelectionDAG &DAG) { |
45804 | SDValue Cmp = Op.getOperand(1); |
45805 | EVT VT = Cmp.getOperand(0).getValueType(); |
45806 | unsigned Log2b = Log2_32(VT.getSizeInBits()); |
45807 | SDLoc dl(Op); |
45808 | SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0)); |
45809 | |
45810 | |
45811 | SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32); |
45812 | SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, |
45813 | DAG.getConstant(Log2b, dl, MVT::i8)); |
45814 | return DAG.getZExtOrTrunc(Scc, dl, ExtTy); |
45815 | } |
45816 | |
45817 | |
45818 | |
45819 | |
45820 | |
45821 | |
45822 | |
45823 | |
45824 | static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, |
45825 | TargetLowering::DAGCombinerInfo &DCI, |
45826 | const X86Subtarget &Subtarget) { |
45827 | if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast()) |
45828 | return SDValue(); |
45829 | |
45830 | auto isORCandidate = [](SDValue N) { |
45831 | return (N->getOpcode() == ISD::OR && N->hasOneUse()); |
45832 | }; |
45833 | |
45834 | |
45835 | |
45836 | |
45837 | if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) || |
45838 | !isORCandidate(N->getOperand(0))) |
45839 | return SDValue(); |
45840 | |
45841 | |
45842 | auto isSetCCCandidate = [](SDValue N) { |
45843 | return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() && |
45844 | X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E && |
45845 | N->getOperand(1).getOpcode() == X86ISD::CMP && |
45846 | isNullConstant(N->getOperand(1).getOperand(1)) && |
45847 | N->getOperand(1).getValueType().bitsGE(MVT::i32); |
45848 | }; |
45849 | |
45850 | SDNode *OR = N->getOperand(0).getNode(); |
45851 | SDValue LHS = OR->getOperand(0); |
45852 | SDValue RHS = OR->getOperand(1); |
45853 | |
45854 | |
45855 | SmallVector<SDNode *, 2> ORNodes; |
45856 | while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) || |
45857 | (isORCandidate(RHS) && isSetCCCandidate(LHS)))) { |
45858 | ORNodes.push_back(OR); |
45859 | OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode(); |
45860 | LHS = OR->getOperand(0); |
45861 | RHS = OR->getOperand(1); |
45862 | } |
45863 | |
45864 | |
45865 | if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) || |
45866 | !isORCandidate(SDValue(OR, 0))) |
45867 | return SDValue(); |
45868 | |
45869 | |
45870 | |
45871 | |
45872 | |
45873 | |
45874 | EVT VT = OR->getValueType(0); |
45875 | SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG); |
45876 | SDValue Ret, NewRHS; |
45877 | if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG))) |
45878 | Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS); |
45879 | |
45880 | if (!Ret) |
45881 | return SDValue(); |
45882 | |
45883 | |
45884 | while (ORNodes.size() > 0) { |
45885 | OR = ORNodes.pop_back_val(); |
45886 | LHS = OR->getOperand(0); |
45887 | RHS = OR->getOperand(1); |
45888 | |
45889 | if (RHS->getOpcode() == ISD::OR) |
45890 | std::swap(LHS, RHS); |
45891 | NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG); |
45892 | if (!NewRHS) |
45893 | return SDValue(); |
45894 | Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS); |
45895 | } |
45896 | |
45897 | if (Ret) |
45898 | Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); |
45899 | |
45900 | return Ret; |
45901 | } |
45902 | |
45903 | static SDValue combineOr(SDNode *N, SelectionDAG &DAG, |
45904 | TargetLowering::DAGCombinerInfo &DCI, |
45905 | const X86Subtarget &Subtarget) { |
45906 | SDValue N0 = N->getOperand(0); |
45907 | SDValue N1 = N->getOperand(1); |
45908 | EVT VT = N->getValueType(0); |
45909 | |
45910 | |
45911 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { |
45912 | return DAG.getBitcast(MVT::v4i32, |
45913 | DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32, |
45914 | DAG.getBitcast(MVT::v4f32, N0), |
45915 | DAG.getBitcast(MVT::v4f32, N1))); |
45916 | } |
45917 | |
45918 | |
45919 | |
45920 | if (VT == MVT::i1) { |
45921 | SmallVector<SDValue, 2> SrcOps; |
45922 | SmallVector<APInt, 2> SrcPartials; |
45923 | if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps, &SrcPartials) && |
45924 | SrcOps.size() == 1) { |
45925 | SDLoc dl(N); |
45926 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
45927 | unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); |
45928 | EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
45929 | SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); |
45930 | if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType())) |
45931 | Mask = DAG.getBitcast(MaskVT, SrcOps[0]); |
45932 | if (Mask) { |
45933 | assert(SrcPartials[0].getBitWidth() == NumElts && |
45934 | "Unexpected partial reduction mask"); |
45935 | SDValue ZeroBits = DAG.getConstant(0, dl, MaskVT); |
45936 | SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT); |
45937 | Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits); |
45938 | return DAG.getSetCC(dl, MVT::i1, Mask, ZeroBits, ISD::SETNE); |
45939 | } |
45940 | } |
45941 | } |
45942 | |
45943 | if (SDValue R = combineBitOpWithMOVMSK(N, DAG)) |
45944 | return R; |
45945 | |
45946 | if (DCI.isBeforeLegalizeOps()) |
45947 | return SDValue(); |
45948 | |
45949 | if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) |
45950 | return R; |
45951 | |
45952 | if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) |
45953 | return FPLogic; |
45954 | |
45955 | if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget)) |
45956 | return R; |
45957 | |
45958 | if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget)) |
45959 | return R; |
45960 | |
45961 | |
45962 | |
45963 | |
45964 | |
45965 | if (N0.getOpcode() == X86ISD::KSHIFTL || N1.getOpcode() == X86ISD::KSHIFTL) { |
45966 | unsigned NumElts = VT.getVectorNumElements(); |
45967 | unsigned HalfElts = NumElts / 2; |
45968 | APInt UpperElts = APInt::getHighBitsSet(NumElts, HalfElts); |
45969 | if (NumElts >= 16 && N1.getOpcode() == X86ISD::KSHIFTL && |
45970 | N1.getConstantOperandAPInt(1) == HalfElts && |
45971 | DAG.MaskedValueIsZero(N0, APInt(1, 1), UpperElts)) { |
45972 | SDLoc dl(N); |
45973 | return DAG.getNode( |
45974 | ISD::CONCAT_VECTORS, dl, VT, |
45975 | extractSubVector(N0, 0, DAG, dl, HalfElts), |
45976 | extractSubVector(N1.getOperand(0), 0, DAG, dl, HalfElts)); |
45977 | } |
45978 | if (NumElts >= 16 && N0.getOpcode() == X86ISD::KSHIFTL && |
45979 | N0.getConstantOperandAPInt(1) == HalfElts && |
45980 | DAG.MaskedValueIsZero(N1, APInt(1, 1), UpperElts)) { |
45981 | SDLoc dl(N); |
45982 | return DAG.getNode( |
45983 | ISD::CONCAT_VECTORS, dl, VT, |
45984 | extractSubVector(N1, 0, DAG, dl, HalfElts), |
45985 | extractSubVector(N0.getOperand(0), 0, DAG, dl, HalfElts)); |
45986 | } |
45987 | } |
45988 | |
45989 | |
45990 | if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { |
45991 | SDValue Op(N, 0); |
45992 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
45993 | return Res; |
45994 | } |
45995 | |
45996 | return SDValue(); |
45997 | } |
45998 | |
45999 | |
46000 | |
46001 | |
46002 | |
46003 | static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) { |
46004 | |
46005 | EVT ResultType = N->getValueType(0); |
46006 | if (ResultType != MVT::i8 && ResultType != MVT::i1) |
46007 | return SDValue(); |
46008 | |
46009 | SDValue N0 = N->getOperand(0); |
46010 | SDValue N1 = N->getOperand(1); |
46011 | |
46012 | |
46013 | if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse()) |
46014 | return SDValue(); |
46015 | |
46016 | |
46017 | if (!isOneConstant(N1)) |
46018 | return SDValue(); |
46019 | |
46020 | |
46021 | SDValue Shift = N0.getOperand(0); |
46022 | if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse()) |
46023 | return SDValue(); |
46024 | |
46025 | |
46026 | EVT ShiftTy = Shift.getValueType(); |
46027 | if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64) |
46028 | return SDValue(); |
46029 | |
46030 | |
46031 | if (!isa<ConstantSDNode>(Shift.getOperand(1)) || |
46032 | Shift.getConstantOperandAPInt(1) != (ShiftTy.getSizeInBits() - 1)) |
46033 | return SDValue(); |
46034 | |
46035 | |
46036 | |
46037 | |
46038 | SDLoc DL(N); |
46039 | SDValue ShiftOp = Shift.getOperand(0); |
46040 | EVT ShiftOpTy = ShiftOp.getValueType(); |
46041 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46042 | EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(), |
46043 | *DAG.getContext(), ResultType); |
46044 | SDValue Cond = DAG.getSetCC(DL, SetCCResultType, ShiftOp, |
46045 | DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT); |
46046 | if (SetCCResultType != ResultType) |
46047 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, ResultType, Cond); |
46048 | return Cond; |
46049 | } |
46050 | |
46051 | |
46052 | |
46053 | |
46054 | |
46055 | |
46056 | |
46057 | |
46058 | static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, |
46059 | const X86Subtarget &Subtarget) { |
46060 | EVT VT = N->getValueType(0); |
46061 | if (!VT.isSimple()) |
46062 | return SDValue(); |
46063 | |
46064 | switch (VT.getSimpleVT().SimpleTy) { |
46065 | default: return SDValue(); |
46066 | case MVT::v16i8: |
46067 | case MVT::v8i16: |
46068 | case MVT::v4i32: |
46069 | case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break; |
46070 | case MVT::v32i8: |
46071 | case MVT::v16i16: |
46072 | case MVT::v8i32: |
46073 | case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break; |
46074 | } |
46075 | |
46076 | |
46077 | |
46078 | SDValue Shift = N->getOperand(0); |
46079 | SDValue Ones = N->getOperand(1); |
46080 | if (Shift.getOpcode() != ISD::SRA || !Shift.hasOneUse() || |
46081 | !ISD::isBuildVectorAllOnes(Ones.getNode())) |
46082 | return SDValue(); |
46083 | |
46084 | |
46085 | auto *ShiftAmt = |
46086 | isConstOrConstSplat(Shift.getOperand(1), true); |
46087 | if (!ShiftAmt || |
46088 | ShiftAmt->getAPIntValue() != (Shift.getScalarValueSizeInBits() - 1)) |
46089 | return SDValue(); |
46090 | |
46091 | |
46092 | |
46093 | return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT); |
46094 | } |
46095 | |
46096 | |
46097 | |
46098 | |
46099 | |
46100 | |
46101 | |
46102 | |
46103 | |
46104 | |
46105 | |
46106 | |
46107 | |
46108 | |
46109 | |
46110 | |
46111 | |
46112 | static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG, |
46113 | const SDLoc &DL) { |
46114 | EVT InVT = In.getValueType(); |
46115 | |
46116 | |
46117 | assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() && |
46118 | "Unexpected types for truncate operation"); |
46119 | |
46120 | |
46121 | auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue { |
46122 | if (V.getOpcode() == Opcode && |
46123 | ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit)) |
46124 | return V.getOperand(0); |
46125 | return SDValue(); |
46126 | }; |
46127 | |
46128 | APInt C1, C2; |
46129 | if (SDValue UMin = MatchMinMax(In, ISD::UMIN, C2)) |
46130 | |
46131 | |
46132 | if (C2.isMask(VT.getScalarSizeInBits())) |
46133 | return UMin; |
46134 | |
46135 | if (SDValue SMin = MatchMinMax(In, ISD::SMIN, C2)) |
46136 | if (MatchMinMax(SMin, ISD::SMAX, C1)) |
46137 | if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits())) |
46138 | return SMin; |
46139 | |
46140 | if (SDValue SMax = MatchMinMax(In, ISD::SMAX, C1)) |
46141 | if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, C2)) |
46142 | if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) && |
46143 | C2.uge(C1)) { |
46144 | return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1)); |
46145 | } |
46146 | |
46147 | return SDValue(); |
46148 | } |
46149 | |
46150 | |
46151 | |
46152 | |
46153 | |
46154 | |
46155 | |
46156 | |
46157 | |
46158 | |
46159 | static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { |
46160 | unsigned NumDstBits = VT.getScalarSizeInBits(); |
46161 | unsigned NumSrcBits = In.getScalarValueSizeInBits(); |
46162 | assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation"); |
46163 | |
46164 | auto MatchMinMax = [](SDValue V, unsigned Opcode, |
46165 | const APInt &Limit) -> SDValue { |
46166 | APInt C; |
46167 | if (V.getOpcode() == Opcode && |
46168 | ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit) |
46169 | return V.getOperand(0); |
46170 | return SDValue(); |
46171 | }; |
46172 | |
46173 | APInt SignedMax, SignedMin; |
46174 | if (MatchPackUS) { |
46175 | SignedMax = APInt::getAllOnesValue(NumDstBits).zext(NumSrcBits); |
46176 | SignedMin = APInt(NumSrcBits, 0); |
46177 | } else { |
46178 | SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits); |
46179 | SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits); |
46180 | } |
46181 | |
46182 | if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax)) |
46183 | if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin)) |
46184 | return SMax; |
46185 | |
46186 | if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin)) |
46187 | if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax)) |
46188 | return SMin; |
46189 | |
46190 | return SDValue(); |
46191 | } |
46192 | |
46193 | static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, |
46194 | SelectionDAG &DAG, |
46195 | const X86Subtarget &Subtarget) { |
46196 | if (!Subtarget.hasSSE2() || !VT.isVector()) |
46197 | return SDValue(); |
46198 | |
46199 | EVT SVT = VT.getVectorElementType(); |
46200 | EVT InVT = In.getValueType(); |
46201 | EVT InSVT = InVT.getVectorElementType(); |
46202 | |
46203 | |
46204 | |
46205 | |
46206 | |
46207 | if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && |
46208 | InVT == MVT::v16i32 && VT == MVT::v16i8) { |
46209 | if (auto USatVal = detectSSatPattern(In, VT, true)) { |
46210 | |
46211 | SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal, |
46212 | DL, DAG, Subtarget); |
46213 | assert(Mid && "Failed to pack!"); |
46214 | return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid); |
46215 | } |
46216 | } |
46217 | |
46218 | |
46219 | |
46220 | |
46221 | |
46222 | |
46223 | |
46224 | bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) || |
46225 | (Subtarget.hasBWI() && InSVT == MVT::i16)) && |
46226 | (InVT.getSizeInBits() > 128) && |
46227 | (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) && |
46228 | !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256); |
46229 | |
46230 | if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 && |
46231 | VT.getSizeInBits() >= 64 && |
46232 | (SVT == MVT::i8 || SVT == MVT::i16) && |
46233 | (InSVT == MVT::i16 || InSVT == MVT::i32)) { |
46234 | if (auto USatVal = detectSSatPattern(In, VT, true)) { |
46235 | |
46236 | |
46237 | |
46238 | if (SVT == MVT::i8 && InSVT == MVT::i32) { |
46239 | EVT MidVT = VT.changeVectorElementType(MVT::i16); |
46240 | SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL, |
46241 | DAG, Subtarget); |
46242 | assert(Mid && "Failed to pack!"); |
46243 | SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, |
46244 | Subtarget); |
46245 | assert(V && "Failed to pack!"); |
46246 | return V; |
46247 | } else if (SVT == MVT::i8 || Subtarget.hasSSE41()) |
46248 | return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG, |
46249 | Subtarget); |
46250 | } |
46251 | if (auto SSatVal = detectSSatPattern(In, VT)) |
46252 | return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG, |
46253 | Subtarget); |
46254 | } |
46255 | |
46256 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46257 | if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 && |
46258 | Subtarget.hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI()) && |
46259 | (SVT == MVT::i32 || SVT == MVT::i16 || SVT == MVT::i8)) { |
46260 | unsigned TruncOpc = 0; |
46261 | SDValue SatVal; |
46262 | if (auto SSatVal = detectSSatPattern(In, VT)) { |
46263 | SatVal = SSatVal; |
46264 | TruncOpc = X86ISD::VTRUNCS; |
46265 | } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) { |
46266 | SatVal = USatVal; |
46267 | TruncOpc = X86ISD::VTRUNCUS; |
46268 | } |
46269 | if (SatVal) { |
46270 | unsigned ResElts = VT.getVectorNumElements(); |
46271 | |
46272 | |
46273 | if (!Subtarget.hasVLX() && !InVT.is512BitVector()) { |
46274 | unsigned NumConcats = 512 / InVT.getSizeInBits(); |
46275 | ResElts *= NumConcats; |
46276 | SmallVector<SDValue, 4> ConcatOps(NumConcats, DAG.getUNDEF(InVT)); |
46277 | ConcatOps[0] = SatVal; |
46278 | InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, |
46279 | NumConcats * InVT.getVectorNumElements()); |
46280 | SatVal = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, ConcatOps); |
46281 | } |
46282 | |
46283 | if (ResElts * SVT.getSizeInBits() < 128) |
46284 | ResElts = 128 / SVT.getSizeInBits(); |
46285 | EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), SVT, ResElts); |
46286 | SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal); |
46287 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
46288 | DAG.getIntPtrConstant(0, DL)); |
46289 | } |
46290 | } |
46291 | |
46292 | return SDValue(); |
46293 | } |
46294 | |
46295 | |
46296 | |
46297 | |
46298 | static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, |
46299 | const X86Subtarget &Subtarget, |
46300 | const SDLoc &DL) { |
46301 | if (!VT.isVector()) |
46302 | return SDValue(); |
46303 | EVT InVT = In.getValueType(); |
46304 | unsigned NumElems = VT.getVectorNumElements(); |
46305 | |
46306 | EVT ScalarVT = VT.getVectorElementType(); |
46307 | if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2)) |
46308 | return SDValue(); |
46309 | |
46310 | |
46311 | |
46312 | EVT InScalarVT = InVT.getVectorElementType(); |
46313 | if (InScalarVT.getFixedSizeInBits() <= ScalarVT.getFixedSizeInBits()) |
46314 | return SDValue(); |
46315 | |
46316 | if (!Subtarget.hasSSE2()) |
46317 | return SDValue(); |
46318 | |
46319 | |
46320 | |
46321 | |
46322 | |
46323 | |
46324 | |
46325 | |
46326 | |
46327 | |
46328 | |
46329 | if (In.getOpcode() != ISD::SRL) |
46330 | return SDValue(); |
46331 | |
46332 | |
46333 | |
46334 | auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) { |
46335 | return ISD::matchUnaryPredicate(V, [Min, Max](ConstantSDNode *C) { |
46336 | return !(C->getAPIntValue().ult(Min) || C->getAPIntValue().ugt(Max)); |
46337 | }); |
46338 | }; |
46339 | |
46340 | |
46341 | SDValue LHS = In.getOperand(0); |
46342 | SDValue RHS = In.getOperand(1); |
46343 | if (!IsConstVectorInRange(RHS, 1, 1)) |
46344 | return SDValue(); |
46345 | if (LHS.getOpcode() != ISD::ADD) |
46346 | return SDValue(); |
46347 | |
46348 | |
46349 | SDValue Operands[3]; |
46350 | Operands[0] = LHS.getOperand(0); |
46351 | Operands[1] = LHS.getOperand(1); |
46352 | |
46353 | auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
46354 | ArrayRef<SDValue> Ops) { |
46355 | return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops); |
46356 | }; |
46357 | |
46358 | auto AVGSplitter = [&](SDValue Op0, SDValue Op1) { |
46359 | |
46360 | unsigned NumElemsPow2 = PowerOf2Ceil(NumElems); |
46361 | EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2); |
46362 | if (NumElemsPow2 != NumElems) { |
46363 | SmallVector<SDValue, 32> Ops0(NumElemsPow2, DAG.getUNDEF(ScalarVT)); |
46364 | SmallVector<SDValue, 32> Ops1(NumElemsPow2, DAG.getUNDEF(ScalarVT)); |
46365 | for (unsigned i = 0; i != NumElems; ++i) { |
46366 | SDValue Idx = DAG.getIntPtrConstant(i, DL); |
46367 | Ops0[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op0, Idx); |
46368 | Ops1[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op1, Idx); |
46369 | } |
46370 | Op0 = DAG.getBuildVector(Pow2VT, DL, Ops0); |
46371 | Op1 = DAG.getBuildVector(Pow2VT, DL, Ops1); |
46372 | } |
46373 | SDValue Res = |
46374 | SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, {Op0, Op1}, AVGBuilder); |
46375 | if (NumElemsPow2 == NumElems) |
46376 | return Res; |
46377 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
46378 | DAG.getIntPtrConstant(0, DL)); |
46379 | }; |
46380 | |
46381 | |
46382 | |
46383 | if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) && |
46384 | Operands[0].getOpcode() == ISD::ZERO_EXTEND && |
46385 | Operands[0].getOperand(0).getValueType() == VT) { |
46386 | |
46387 | |
46388 | SDValue VecOnes = DAG.getConstant(1, DL, InVT); |
46389 | Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes); |
46390 | Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); |
46391 | return AVGSplitter(Operands[0].getOperand(0), Operands[1]); |
46392 | } |
46393 | |
46394 | |
46395 | |
46396 | auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) { |
46397 | if (ISD::ADD == V.getOpcode()) { |
46398 | Op0 = V.getOperand(0); |
46399 | Op1 = V.getOperand(1); |
46400 | return true; |
46401 | } |
46402 | if (ISD::ZERO_EXTEND != V.getOpcode()) |
46403 | return false; |
46404 | V = V.getOperand(0); |
46405 | if (V.getValueType() != VT || ISD::OR != V.getOpcode() || |
46406 | !DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1))) |
46407 | return false; |
46408 | Op0 = V.getOperand(0); |
46409 | Op1 = V.getOperand(1); |
46410 | return true; |
46411 | }; |
46412 | |
46413 | SDValue Op0, Op1; |
46414 | if (FindAddLike(Operands[0], Op0, Op1)) |
46415 | std::swap(Operands[0], Operands[1]); |
46416 | else if (!FindAddLike(Operands[1], Op0, Op1)) |
46417 | return SDValue(); |
46418 | Operands[2] = Op0; |
46419 | Operands[1] = Op1; |
46420 | |
46421 | |
46422 | |
46423 | for (int i = 0; i < 3; ++i) { |
46424 | if (!IsConstVectorInRange(Operands[i], 1, 1)) |
46425 | continue; |
46426 | std::swap(Operands[i], Operands[2]); |
46427 | |
46428 | |
46429 | for (int j = 0; j < 2; ++j) |
46430 | if (Operands[j].getValueType() != VT) { |
46431 | if (Operands[j].getOpcode() != ISD::ZERO_EXTEND || |
46432 | Operands[j].getOperand(0).getValueType() != VT) |
46433 | return SDValue(); |
46434 | Operands[j] = Operands[j].getOperand(0); |
46435 | } |
46436 | |
46437 | |
46438 | return AVGSplitter(Operands[0], Operands[1]); |
46439 | } |
46440 | |
46441 | return SDValue(); |
46442 | } |
46443 | |
46444 | static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, |
46445 | TargetLowering::DAGCombinerInfo &DCI, |
46446 | const X86Subtarget &Subtarget) { |
46447 | LoadSDNode *Ld = cast<LoadSDNode>(N); |
46448 | EVT RegVT = Ld->getValueType(0); |
46449 | EVT MemVT = Ld->getMemoryVT(); |
46450 | SDLoc dl(Ld); |
46451 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46452 | |
46453 | |
46454 | |
46455 | |
46456 | ISD::LoadExtType Ext = Ld->getExtensionType(); |
46457 | bool Fast; |
46458 | if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && |
46459 | Ext == ISD::NON_EXTLOAD && |
46460 | ((Ld->isNonTemporal() && !Subtarget.hasInt256() && |
46461 | Ld->getAlignment() >= 16) || |
46462 | (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, |
46463 | *Ld->getMemOperand(), &Fast) && |
46464 | !Fast))) { |
46465 | unsigned NumElems = RegVT.getVectorNumElements(); |
46466 | if (NumElems < 2) |
46467 | return SDValue(); |
46468 | |
46469 | unsigned HalfOffset = 16; |
46470 | SDValue Ptr1 = Ld->getBasePtr(); |
46471 | SDValue Ptr2 = |
46472 | DAG.getMemBasePlusOffset(Ptr1, TypeSize::Fixed(HalfOffset), dl); |
46473 | EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), |
46474 | NumElems / 2); |
46475 | SDValue Load1 = |
46476 | DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(), |
46477 | Ld->getOriginalAlign(), |
46478 | Ld->getMemOperand()->getFlags()); |
46479 | SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2, |
46480 | Ld->getPointerInfo().getWithOffset(HalfOffset), |
46481 | Ld->getOriginalAlign(), |
46482 | Ld->getMemOperand()->getFlags()); |
46483 | SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
46484 | Load1.getValue(1), Load2.getValue(1)); |
46485 | |
46486 | SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2); |
46487 | return DCI.CombineTo(N, NewVec, TF, true); |
46488 | } |
46489 | |
46490 | |
46491 | |
46492 | if (Ext == ISD::NON_EXTLOAD && !Subtarget.hasAVX512() && RegVT.isVector() && |
46493 | RegVT.getScalarType() == MVT::i1 && DCI.isBeforeLegalize()) { |
46494 | unsigned NumElts = RegVT.getVectorNumElements(); |
46495 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
46496 | if (TLI.isTypeLegal(IntVT)) { |
46497 | SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(), |
46498 | Ld->getPointerInfo(), |
46499 | Ld->getOriginalAlign(), |
46500 | Ld->getMemOperand()->getFlags()); |
46501 | SDValue BoolVec = DAG.getBitcast(RegVT, IntLoad); |
46502 | return DCI.CombineTo(N, BoolVec, IntLoad.getValue(1), true); |
46503 | } |
46504 | } |
46505 | |
46506 | |
46507 | |
46508 | if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() && |
46509 | (RegVT.is128BitVector() || RegVT.is256BitVector())) { |
46510 | SDValue Ptr = Ld->getBasePtr(); |
46511 | SDValue Chain = Ld->getChain(); |
46512 | for (SDNode *User : Ptr->uses()) { |
46513 | if (User != N && User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && |
46514 | cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr && |
46515 | cast<MemIntrinsicSDNode>(User)->getChain() == Chain && |
46516 | cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() == |
46517 | MemVT.getSizeInBits() && |
46518 | !User->hasAnyUseOfValue(1) && |
46519 | User->getValueSizeInBits(0).getFixedSize() > |
46520 | RegVT.getFixedSizeInBits()) { |
46521 | SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N), |
46522 | RegVT.getSizeInBits()); |
46523 | Extract = DAG.getBitcast(RegVT, Extract); |
46524 | return DCI.CombineTo(N, Extract, SDValue(User, 1)); |
46525 | } |
46526 | } |
46527 | } |
46528 | |
46529 | |
46530 | unsigned AddrSpace = Ld->getAddressSpace(); |
46531 | if (AddrSpace == X86AS::PTR64 || AddrSpace == X86AS::PTR32_SPTR || |
46532 | AddrSpace == X86AS::PTR32_UPTR) { |
46533 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
46534 | if (PtrVT != Ld->getBasePtr().getSimpleValueType()) { |
46535 | SDValue Cast = |
46536 | DAG.getAddrSpaceCast(dl, PtrVT, Ld->getBasePtr(), AddrSpace, 0); |
46537 | return DAG.getLoad(RegVT, dl, Ld->getChain(), Cast, Ld->getPointerInfo(), |
46538 | Ld->getOriginalAlign(), |
46539 | Ld->getMemOperand()->getFlags()); |
46540 | } |
46541 | } |
46542 | |
46543 | return SDValue(); |
46544 | } |
46545 | |
46546 | |
46547 | |
46548 | |
46549 | static int getOneTrueElt(SDValue V) { |
46550 | |
46551 | |
46552 | |
46553 | |
46554 | |
46555 | |
46556 | |
46557 | auto *BV = dyn_cast<BuildVectorSDNode>(V); |
46558 | if (!BV || BV->getValueType(0).getVectorElementType() != MVT::i1) |
46559 | return -1; |
46560 | |
46561 | int TrueIndex = -1; |
46562 | unsigned NumElts = BV->getValueType(0).getVectorNumElements(); |
46563 | for (unsigned i = 0; i < NumElts; ++i) { |
46564 | const SDValue &Op = BV->getOperand(i); |
46565 | if (Op.isUndef()) |
46566 | continue; |
46567 | auto *ConstNode = dyn_cast<ConstantSDNode>(Op); |
46568 | if (!ConstNode) |
46569 | return -1; |
46570 | if (ConstNode->getAPIntValue().countTrailingOnes() >= 1) { |
46571 | |
46572 | if (TrueIndex >= 0) |
46573 | return -1; |
46574 | TrueIndex = i; |
46575 | } |
46576 | } |
46577 | return TrueIndex; |
46578 | } |
46579 | |
46580 | |
46581 | |
46582 | |
46583 | |
46584 | static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, |
46585 | SelectionDAG &DAG, SDValue &Addr, |
46586 | SDValue &Index, Align &Alignment, |
46587 | unsigned &Offset) { |
46588 | int TrueMaskElt = getOneTrueElt(MaskedOp->getMask()); |
46589 | if (TrueMaskElt < 0) |
46590 | return false; |
46591 | |
46592 | |
46593 | |
46594 | EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType(); |
46595 | Offset = 0; |
46596 | Addr = MaskedOp->getBasePtr(); |
46597 | if (TrueMaskElt != 0) { |
46598 | Offset = TrueMaskElt * EltVT.getStoreSize(); |
46599 | Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::Fixed(Offset), |
46600 | SDLoc(MaskedOp)); |
46601 | } |
46602 | |
46603 | Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp)); |
46604 | Alignment = commonAlignment(MaskedOp->getOriginalAlign(), |
46605 | EltVT.getStoreSize()); |
46606 | return true; |
46607 | } |
46608 | |
46609 | |
46610 | |
46611 | |
46612 | |
46613 | static SDValue |
46614 | reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, |
46615 | TargetLowering::DAGCombinerInfo &DCI, |
46616 | const X86Subtarget &Subtarget) { |
46617 | assert(ML->isUnindexed() && "Unexpected indexed masked load!"); |
46618 | |
46619 | |
46620 | |
46621 | |
46622 | SDValue Addr, VecIndex; |
46623 | Align Alignment; |
46624 | unsigned Offset; |
46625 | if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment, Offset)) |
46626 | return SDValue(); |
46627 | |
46628 | |
46629 | |
46630 | SDLoc DL(ML); |
46631 | EVT VT = ML->getValueType(0); |
46632 | EVT EltVT = VT.getVectorElementType(); |
46633 | |
46634 | EVT CastVT = VT; |
46635 | if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { |
46636 | EltVT = MVT::f64; |
46637 | CastVT = VT.changeVectorElementType(EltVT); |
46638 | } |
46639 | |
46640 | SDValue Load = |
46641 | DAG.getLoad(EltVT, DL, ML->getChain(), Addr, |
46642 | ML->getPointerInfo().getWithOffset(Offset), |
46643 | Alignment, ML->getMemOperand()->getFlags()); |
46644 | |
46645 | SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru()); |
46646 | |
46647 | |
46648 | SDValue Insert = |
46649 | DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, CastVT, PassThru, Load, VecIndex); |
46650 | Insert = DAG.getBitcast(VT, Insert); |
46651 | return DCI.CombineTo(ML, Insert, Load.getValue(1), true); |
46652 | } |
46653 | |
46654 | static SDValue |
46655 | combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, |
46656 | TargetLowering::DAGCombinerInfo &DCI) { |
46657 | assert(ML->isUnindexed() && "Unexpected indexed masked load!"); |
46658 | if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode())) |
46659 | return SDValue(); |
46660 | |
46661 | SDLoc DL(ML); |
46662 | EVT VT = ML->getValueType(0); |
46663 | |
46664 | |
46665 | |
46666 | |
46667 | unsigned NumElts = VT.getVectorNumElements(); |
46668 | BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask()); |
46669 | bool LoadFirstElt = !isNullConstant(MaskBV->getOperand(0)); |
46670 | bool LoadLastElt = !isNullConstant(MaskBV->getOperand(NumElts - 1)); |
46671 | if (LoadFirstElt && LoadLastElt) { |
46672 | SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(), |
46673 | ML->getMemOperand()); |
46674 | SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, |
46675 | ML->getPassThru()); |
46676 | return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true); |
46677 | } |
46678 | |
46679 | |
46680 | |
46681 | |
46682 | |
46683 | |
46684 | |
46685 | if (ML->getPassThru().isUndef()) |
46686 | return SDValue(); |
46687 | |
46688 | if (ISD::isBuildVectorAllZeros(ML->getPassThru().getNode())) |
46689 | return SDValue(); |
46690 | |
46691 | |
46692 | |
46693 | SDValue NewML = DAG.getMaskedLoad( |
46694 | VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(), |
46695 | DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(), |
46696 | ML->getAddressingMode(), ML->getExtensionType()); |
46697 | SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, |
46698 | ML->getPassThru()); |
46699 | |
46700 | return DCI.CombineTo(ML, Blend, NewML.getValue(1), true); |
46701 | } |
46702 | |
46703 | static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, |
46704 | TargetLowering::DAGCombinerInfo &DCI, |
46705 | const X86Subtarget &Subtarget) { |
46706 | auto *Mld = cast<MaskedLoadSDNode>(N); |
46707 | |
46708 | |
46709 | if (Mld->isExpandingLoad()) |
46710 | return SDValue(); |
46711 | |
46712 | if (Mld->getExtensionType() == ISD::NON_EXTLOAD) { |
46713 | if (SDValue ScalarLoad = |
46714 | reduceMaskedLoadToScalarLoad(Mld, DAG, DCI, Subtarget)) |
46715 | return ScalarLoad; |
46716 | |
46717 | |
46718 | if (!Subtarget.hasAVX512()) |
46719 | if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI)) |
46720 | return Blend; |
46721 | } |
46722 | |
46723 | |
46724 | |
46725 | SDValue Mask = Mld->getMask(); |
46726 | if (Mask.getScalarValueSizeInBits() != 1) { |
46727 | EVT VT = Mld->getValueType(0); |
46728 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46729 | APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); |
46730 | if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) { |
46731 | if (N->getOpcode() != ISD::DELETED_NODE) |
46732 | DCI.AddToWorklist(N); |
46733 | return SDValue(N, 0); |
46734 | } |
46735 | if (SDValue NewMask = |
46736 | TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG)) |
46737 | return DAG.getMaskedLoad( |
46738 | VT, SDLoc(N), Mld->getChain(), Mld->getBasePtr(), Mld->getOffset(), |
46739 | NewMask, Mld->getPassThru(), Mld->getMemoryVT(), Mld->getMemOperand(), |
46740 | Mld->getAddressingMode(), Mld->getExtensionType()); |
46741 | } |
46742 | |
46743 | return SDValue(); |
46744 | } |
46745 | |
46746 | |
46747 | |
46748 | |
46749 | |
46750 | static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, |
46751 | SelectionDAG &DAG, |
46752 | const X86Subtarget &Subtarget) { |
46753 | |
46754 | |
46755 | |
46756 | |
46757 | SDValue Addr, VecIndex; |
46758 | Align Alignment; |
46759 | unsigned Offset; |
46760 | if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment, Offset)) |
46761 | return SDValue(); |
46762 | |
46763 | |
46764 | SDLoc DL(MS); |
46765 | SDValue Value = MS->getValue(); |
46766 | EVT VT = Value.getValueType(); |
46767 | EVT EltVT = VT.getVectorElementType(); |
46768 | if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { |
46769 | EltVT = MVT::f64; |
46770 | EVT CastVT = VT.changeVectorElementType(EltVT); |
46771 | Value = DAG.getBitcast(CastVT, Value); |
46772 | } |
46773 | SDValue Extract = |
46774 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Value, VecIndex); |
46775 | |
46776 | |
46777 | return DAG.getStore(MS->getChain(), DL, Extract, Addr, |
46778 | MS->getPointerInfo().getWithOffset(Offset), |
46779 | Alignment, MS->getMemOperand()->getFlags()); |
46780 | } |
46781 | |
46782 | static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, |
46783 | TargetLowering::DAGCombinerInfo &DCI, |
46784 | const X86Subtarget &Subtarget) { |
46785 | MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N); |
46786 | if (Mst->isCompressingStore()) |
46787 | return SDValue(); |
46788 | |
46789 | EVT VT = Mst->getValue().getValueType(); |
46790 | SDLoc dl(Mst); |
46791 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46792 | |
46793 | if (Mst->isTruncatingStore()) |
46794 | return SDValue(); |
46795 | |
46796 | if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget)) |
46797 | return ScalarStore; |
46798 | |
46799 | |
46800 | |
46801 | SDValue Mask = Mst->getMask(); |
46802 | if (Mask.getScalarValueSizeInBits() != 1) { |
46803 | APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); |
46804 | if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) { |
46805 | if (N->getOpcode() != ISD::DELETED_NODE) |
46806 | DCI.AddToWorklist(N); |
46807 | return SDValue(N, 0); |
46808 | } |
46809 | if (SDValue NewMask = |
46810 | TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG)) |
46811 | return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Mst->getValue(), |
46812 | Mst->getBasePtr(), Mst->getOffset(), NewMask, |
46813 | Mst->getMemoryVT(), Mst->getMemOperand(), |
46814 | Mst->getAddressingMode()); |
46815 | } |
46816 | |
46817 | SDValue Value = Mst->getValue(); |
46818 | if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && |
46819 | TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), |
46820 | Mst->getMemoryVT())) { |
46821 | return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), |
46822 | Mst->getBasePtr(), Mst->getOffset(), Mask, |
46823 | Mst->getMemoryVT(), Mst->getMemOperand(), |
46824 | Mst->getAddressingMode(), true); |
46825 | } |
46826 | |
46827 | return SDValue(); |
46828 | } |
46829 | |
46830 | static SDValue combineStore(SDNode *N, SelectionDAG &DAG, |
46831 | TargetLowering::DAGCombinerInfo &DCI, |
46832 | const X86Subtarget &Subtarget) { |
46833 | StoreSDNode *St = cast<StoreSDNode>(N); |
46834 | EVT StVT = St->getMemoryVT(); |
46835 | SDLoc dl(St); |
46836 | SDValue StoredVal = St->getValue(); |
46837 | EVT VT = StoredVal.getValueType(); |
46838 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46839 | |
46840 | |
46841 | if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() && |
46842 | VT.getVectorElementType() == MVT::i1) { |
46843 | |
46844 | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); |
46845 | StoredVal = DAG.getBitcast(NewVT, StoredVal); |
46846 | |
46847 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
46848 | St->getPointerInfo(), St->getOriginalAlign(), |
46849 | St->getMemOperand()->getFlags()); |
46850 | } |
46851 | |
46852 | |
46853 | |
46854 | if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() && |
46855 | StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR && |
46856 | StoredVal.getOperand(0).getValueType() == MVT::i8) { |
46857 | SDValue Val = StoredVal.getOperand(0); |
46858 | |
46859 | Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1); |
46860 | return DAG.getStore(St->getChain(), dl, Val, |
46861 | St->getBasePtr(), St->getPointerInfo(), |
46862 | St->getOriginalAlign(), |
46863 | St->getMemOperand()->getFlags()); |
46864 | } |
46865 | |
46866 | |
46867 | if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && |
46868 | Subtarget.hasAVX512()) { |
46869 | unsigned NumConcats = 8 / VT.getVectorNumElements(); |
46870 | |
46871 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT)); |
46872 | Ops[0] = StoredVal; |
46873 | StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); |
46874 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
46875 | St->getPointerInfo(), St->getOriginalAlign(), |
46876 | St->getMemOperand()->getFlags()); |
46877 | } |
46878 | |
46879 | |
46880 | if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 || |
46881 | VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) && |
46882 | ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) { |
46883 | |
46884 | if (!DCI.isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) { |
46885 | SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl, |
46886 | StoredVal->ops().slice(0, 32)); |
46887 | Lo = combinevXi1ConstantToInteger(Lo, DAG); |
46888 | SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl, |
46889 | StoredVal->ops().slice(32, 32)); |
46890 | Hi = combinevXi1ConstantToInteger(Hi, DAG); |
46891 | |
46892 | SDValue Ptr0 = St->getBasePtr(); |
46893 | SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(4), dl); |
46894 | |
46895 | SDValue Ch0 = |
46896 | DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(), |
46897 | St->getOriginalAlign(), |
46898 | St->getMemOperand()->getFlags()); |
46899 | SDValue Ch1 = |
46900 | DAG.getStore(St->getChain(), dl, Hi, Ptr1, |
46901 | St->getPointerInfo().getWithOffset(4), |
46902 | St->getOriginalAlign(), |
46903 | St->getMemOperand()->getFlags()); |
46904 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); |
46905 | } |
46906 | |
46907 | StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG); |
46908 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
46909 | St->getPointerInfo(), St->getOriginalAlign(), |
46910 | St->getMemOperand()->getFlags()); |
46911 | } |
46912 | |
46913 | |
46914 | |
46915 | bool Fast; |
46916 | if (VT.is256BitVector() && StVT == VT && |
46917 | TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, |
46918 | *St->getMemOperand(), &Fast) && |
46919 | !Fast) { |
46920 | unsigned NumElems = VT.getVectorNumElements(); |
46921 | if (NumElems < 2) |
46922 | return SDValue(); |
46923 | |
46924 | return splitVectorStore(St, DAG); |
46925 | } |
46926 | |
46927 | |
46928 | if (St->isNonTemporal() && StVT == VT && |
46929 | St->getAlignment() < VT.getStoreSize()) { |
46930 | |
46931 | |
46932 | if (VT.is256BitVector() || VT.is512BitVector()) { |
46933 | unsigned NumElems = VT.getVectorNumElements(); |
46934 | if (NumElems < 2) |
46935 | return SDValue(); |
46936 | return splitVectorStore(St, DAG); |
46937 | } |
46938 | |
46939 | |
46940 | |
46941 | if (VT.is128BitVector() && Subtarget.hasSSE2()) { |
46942 | MVT NTVT = Subtarget.hasSSE4A() |
46943 | ? MVT::v2f64 |
46944 | : (TLI.isTypeLegal(MVT::i64) ? MVT::v2i64 : MVT::v4i32); |
46945 | return scalarizeVectorStore(St, NTVT, DAG); |
46946 | } |
46947 | } |
46948 | |
46949 | |
46950 | |
46951 | if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() && |
46952 | St->getValue().getOpcode() == ISD::TRUNCATE && |
46953 | St->getValue().getOperand(0).getValueType() == MVT::v16i16 && |
46954 | TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) && |
46955 | St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) { |
46956 | SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue()); |
46957 | return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(), |
46958 | MVT::v16i8, St->getMemOperand()); |
46959 | } |
46960 | |
46961 | |
46962 | if (!St->isTruncatingStore() && StoredVal.hasOneUse() && |
46963 | (StoredVal.getOpcode() == X86ISD::VTRUNCUS || |
46964 | StoredVal.getOpcode() == X86ISD::VTRUNCS) && |
46965 | TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) { |
46966 | bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS; |
46967 | return EmitTruncSStore(IsSigned, St->getChain(), |
46968 | dl, StoredVal.getOperand(0), St->getBasePtr(), |
46969 | VT, St->getMemOperand(), DAG); |
46970 | } |
46971 | |
46972 | |
46973 | if (!St->isTruncatingStore() && StoredVal.hasOneUse()) { |
46974 | auto IsExtractedElement = [](SDValue V) { |
46975 | if (V.getOpcode() == ISD::TRUNCATE && V.getOperand(0).hasOneUse()) |
46976 | V = V.getOperand(0); |
46977 | unsigned Opc = V.getOpcode(); |
46978 | if (Opc == ISD::EXTRACT_VECTOR_ELT || Opc == X86ISD::PEXTRW) { |
46979 | if (V.getOperand(0).hasOneUse() && isNullConstant(V.getOperand(1))) |
46980 | return V.getOperand(0); |
46981 | } |
46982 | return SDValue(); |
46983 | }; |
46984 | if (SDValue Extract = IsExtractedElement(StoredVal)) { |
46985 | SDValue Trunc = peekThroughOneUseBitcasts(Extract); |
46986 | if (Trunc.getOpcode() == X86ISD::VTRUNC) { |
46987 | SDValue Src = Trunc.getOperand(0); |
46988 | MVT DstVT = Trunc.getSimpleValueType(); |
46989 | MVT SrcVT = Src.getSimpleValueType(); |
46990 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
46991 | unsigned NumTruncBits = DstVT.getScalarSizeInBits() * NumSrcElts; |
46992 | MVT TruncVT = MVT::getVectorVT(DstVT.getScalarType(), NumSrcElts); |
46993 | if (NumTruncBits == VT.getSizeInBits() && |
46994 | TLI.isTruncStoreLegal(SrcVT, TruncVT)) { |
46995 | return DAG.getTruncStore(St->getChain(), dl, Src, St->getBasePtr(), |
46996 | TruncVT, St->getMemOperand()); |
46997 | } |
46998 | } |
46999 | } |
47000 | } |
47001 | |
47002 | |
47003 | |
47004 | |
47005 | if (St->isTruncatingStore() && VT.isVector()) { |
47006 | |
47007 | |
47008 | |
47009 | if (DCI.isBeforeLegalize() || TLI.isTypeLegal(St->getMemoryVT())) |
47010 | if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, |
47011 | Subtarget, dl)) |
47012 | return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), |
47013 | St->getPointerInfo(), St->getOriginalAlign(), |
47014 | St->getMemOperand()->getFlags()); |
47015 | |
47016 | if (TLI.isTruncStoreLegal(VT, StVT)) { |
47017 | if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT())) |
47018 | return EmitTruncSStore(true , St->getChain(), |
47019 | dl, Val, St->getBasePtr(), |
47020 | St->getMemoryVT(), St->getMemOperand(), DAG); |
47021 | if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(), |
47022 | DAG, dl)) |
47023 | return EmitTruncSStore(false , St->getChain(), |
47024 | dl, Val, St->getBasePtr(), |
47025 | St->getMemoryVT(), St->getMemOperand(), DAG); |
47026 | } |
47027 | |
47028 | return SDValue(); |
47029 | } |
47030 | |
47031 | |
47032 | unsigned AddrSpace = St->getAddressSpace(); |
47033 | if (AddrSpace == X86AS::PTR64 || AddrSpace == X86AS::PTR32_SPTR || |
47034 | AddrSpace == X86AS::PTR32_UPTR) { |
47035 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
47036 | if (PtrVT != St->getBasePtr().getSimpleValueType()) { |
47037 | SDValue Cast = |
47038 | DAG.getAddrSpaceCast(dl, PtrVT, St->getBasePtr(), AddrSpace, 0); |
47039 | return DAG.getStore(St->getChain(), dl, StoredVal, Cast, |
47040 | St->getPointerInfo(), St->getOriginalAlign(), |
47041 | St->getMemOperand()->getFlags(), St->getAAInfo()); |
47042 | } |
47043 | } |
47044 | |
47045 | |
47046 | |
47047 | |
47048 | |
47049 | |
47050 | |
47051 | if (VT.getSizeInBits() != 64) |
47052 | return SDValue(); |
47053 | |
47054 | const Function &F = DAG.getMachineFunction().getFunction(); |
47055 | bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); |
47056 | bool F64IsLegal = |
47057 | !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); |
47058 | if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) && |
47059 | isa<LoadSDNode>(St->getValue()) && |
47060 | cast<LoadSDNode>(St->getValue())->isSimple() && |
47061 | St->getChain().hasOneUse() && St->isSimple()) { |
47062 | LoadSDNode *Ld = cast<LoadSDNode>(St->getValue().getNode()); |
47063 | |
47064 | if (!ISD::isNormalLoad(Ld)) |
47065 | return SDValue(); |
47066 | |
47067 | |
47068 | if (!Ld->hasNUsesOfValue(1, 0)) |
47069 | return SDValue(); |
47070 | |
47071 | SDLoc LdDL(Ld); |
47072 | SDLoc StDL(N); |
47073 | |
47074 | SDValue NewLd = DAG.getLoad(MVT::f64, LdDL, Ld->getChain(), |
47075 | Ld->getBasePtr(), Ld->getMemOperand()); |
47076 | |
47077 | |
47078 | DAG.makeEquivalentMemoryOrdering(Ld, NewLd); |
47079 | return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(), |
47080 | St->getMemOperand()); |
47081 | } |
47082 | |
47083 | |
47084 | |
47085 | |
47086 | |
47087 | |
47088 | |
47089 | if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() && |
47090 | St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
47091 | SDValue OldExtract = St->getOperand(1); |
47092 | SDValue ExtOp0 = OldExtract.getOperand(0); |
47093 | unsigned VecSize = ExtOp0.getValueSizeInBits(); |
47094 | EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64); |
47095 | SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0); |
47096 | SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, |
47097 | BitCast, OldExtract.getOperand(1)); |
47098 | return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(), |
47099 | St->getPointerInfo(), St->getOriginalAlign(), |
47100 | St->getMemOperand()->getFlags()); |
47101 | } |
47102 | |
47103 | return SDValue(); |
47104 | } |
47105 | |
47106 | static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG, |
47107 | TargetLowering::DAGCombinerInfo &DCI, |
47108 | const X86Subtarget &Subtarget) { |
47109 | auto *St = cast<MemIntrinsicSDNode>(N); |
47110 | |
47111 | SDValue StoredVal = N->getOperand(1); |
47112 | MVT VT = StoredVal.getSimpleValueType(); |
47113 | EVT MemVT = St->getMemoryVT(); |
47114 | |
47115 | |
47116 | unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits(); |
47117 | APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts); |
47118 | |
47119 | APInt KnownUndef, KnownZero; |
47120 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
47121 | if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, KnownUndef, |
47122 | KnownZero, DCI)) { |
47123 | if (N->getOpcode() != ISD::DELETED_NODE) |
47124 | DCI.AddToWorklist(N); |
47125 | return SDValue(N, 0); |
47126 | } |
47127 | |
47128 | return SDValue(); |
47129 | } |
47130 | |
47131 | |
47132 | |
47133 | |
47134 | |
47135 | |
47136 | |
47137 | |
47138 | |
47139 | |
47140 | |
47141 | |
47142 | |
47143 | |
47144 | static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS, |
47145 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
47146 | bool IsCommutative, |
47147 | SmallVectorImpl<int> &PostShuffleMask) { |
47148 | |
47149 | if (LHS.isUndef() || RHS.isUndef()) |
47150 | return false; |
47151 | |
47152 | |
47153 | |
47154 | |
47155 | |
47156 | |
47157 | |
47158 | |
47159 | |
47160 | |
47161 | MVT VT = LHS.getSimpleValueType(); |
47162 | assert((VT.is128BitVector() || VT.is256BitVector()) && |
47163 | "Unsupported vector type for horizontal add/sub"); |
47164 | unsigned NumElts = VT.getVectorNumElements(); |
47165 | |
47166 | auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1, |
47167 | SmallVectorImpl<int> &ShuffleMask) { |
47168 | bool UseSubVector = false; |
47169 | if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
47170 | Op.getOperand(0).getValueType().is256BitVector() && |
47171 | llvm::isNullConstant(Op.getOperand(1))) { |
47172 | Op = Op.getOperand(0); |
47173 | UseSubVector = true; |
47174 | } |
47175 | SmallVector<SDValue, 2> SrcOps; |
47176 | SmallVector<int, 16> SrcMask, ScaledMask; |
47177 | SDValue BC = peekThroughBitcasts(Op); |
47178 | if (getTargetShuffleInputs(BC, SrcOps, SrcMask, DAG) && |
47179 | !isAnyZero(SrcMask) && all_of(SrcOps, [BC](SDValue Op) { |
47180 | return Op.getValueSizeInBits() == BC.getValueSizeInBits(); |
47181 | })) { |
47182 | resolveTargetShuffleInputsAndMask(SrcOps, SrcMask); |
47183 | if (!UseSubVector && SrcOps.size() <= 2 && |
47184 | scaleShuffleElements(SrcMask, NumElts, ScaledMask)) { |
47185 | N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue(); |
47186 | N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue(); |
47187 | ShuffleMask.assign(ScaledMask.begin(), ScaledMask.end()); |
47188 | } |
47189 | if (UseSubVector && SrcOps.size() == 1 && |
47190 | scaleShuffleElements(SrcMask, 2 * NumElts, ScaledMask)) { |
47191 | std::tie(N0, N1) = DAG.SplitVector(SrcOps[0], SDLoc(Op)); |
47192 | ArrayRef<int> Mask = ArrayRef<int>(ScaledMask).slice(0, NumElts); |
47193 | ShuffleMask.assign(Mask.begin(), Mask.end()); |
47194 | } |
47195 | } |
47196 | }; |
47197 | |
47198 | |
47199 | |
47200 | |
47201 | |
47202 | |
47203 | SDValue A, B; |
47204 | SmallVector<int, 16> LMask; |
47205 | GetShuffle(LHS, A, B, LMask); |
47206 | |
47207 | |
47208 | |
47209 | SDValue C, D; |
47210 | SmallVector<int, 16> RMask; |
47211 | GetShuffle(RHS, C, D, RMask); |
47212 | |
47213 | |
47214 | unsigned NumShuffles = (LMask.empty() ? 0 : 1) + (RMask.empty() ? 0 : 1); |
47215 | if (NumShuffles == 0) |
47216 | return false; |
47217 | |
47218 | if (LMask.empty()) { |
47219 | A = LHS; |
47220 | for (unsigned i = 0; i != NumElts; ++i) |
47221 | LMask.push_back(i); |
47222 | } |
47223 | |
47224 | if (RMask.empty()) { |
47225 | C = RHS; |
47226 | for (unsigned i = 0; i != NumElts; ++i) |
47227 | RMask.push_back(i); |
47228 | } |
47229 | |
47230 | |
47231 | if (isUndefOrInRange(LMask, 0, NumElts)) |
47232 | B = SDValue(); |
47233 | else if (isUndefOrInRange(LMask, NumElts, NumElts * 2)) |
47234 | A = SDValue(); |
47235 | |
47236 | if (isUndefOrInRange(RMask, 0, NumElts)) |
47237 | D = SDValue(); |
47238 | else if (isUndefOrInRange(RMask, NumElts, NumElts * 2)) |
47239 | C = SDValue(); |
47240 | |
47241 | |
47242 | |
47243 | if (A != C) { |
47244 | std::swap(C, D); |
47245 | ShuffleVectorSDNode::commuteMask(RMask); |
47246 | } |
47247 | |
47248 | if (!(A == C && B == D)) |
47249 | return false; |
47250 | |
47251 | PostShuffleMask.clear(); |
47252 | PostShuffleMask.append(NumElts, SM_SentinelUndef); |
47253 | |
47254 | |
47255 | |
47256 | |
47257 | |
47258 | |
47259 | |
47260 | unsigned Num128BitChunks = VT.getSizeInBits() / 128; |
47261 | unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks; |
47262 | unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2; |
47263 | assert((NumEltsPer128BitChunk % 2 == 0) && |
47264 | "Vector type should have an even number of elements in each lane"); |
47265 | for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) { |
47266 | for (unsigned i = 0; i != NumEltsPer128BitChunk; ++i) { |
47267 | |
47268 | int LIdx = LMask[i + j], RIdx = RMask[i + j]; |
47269 | if (LIdx < 0 || RIdx < 0 || |
47270 | (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) || |
47271 | (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts))) |
47272 | continue; |
47273 | |
47274 | |
47275 | |
47276 | if (!((RIdx & 1) == 1 && (LIdx + 1) == RIdx) && |
47277 | !((LIdx & 1) == 1 && (RIdx + 1) == LIdx && IsCommutative)) |
47278 | return false; |
47279 | |
47280 | |
47281 | |
47282 | int Base = LIdx & ~1u; |
47283 | int Index = ((Base % NumEltsPer128BitChunk) / 2) + |
47284 | ((Base % NumElts) & ~(NumEltsPer128BitChunk - 1)); |
47285 | |
47286 | |
47287 | |
47288 | |
47289 | if ((B && Base >= (int)NumElts) || (!B && i >= NumEltsPer64BitChunk)) |
47290 | Index += NumEltsPer64BitChunk; |
47291 | PostShuffleMask[i + j] = Index; |
47292 | } |
47293 | } |
47294 | |
47295 | SDValue NewLHS = A.getNode() ? A : B; |
47296 | SDValue NewRHS = B.getNode() ? B : A; |
47297 | |
47298 | bool IsIdentityPostShuffle = |
47299 | isSequentialOrUndefInRange(PostShuffleMask, 0, NumElts, 0); |
47300 | if (IsIdentityPostShuffle) |
47301 | PostShuffleMask.clear(); |
47302 | |
47303 | |
47304 | if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() && |
47305 | isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask)) |
47306 | return false; |
47307 | |
47308 | |
47309 | |
47310 | bool FoundHorizLHS = llvm::any_of(NewLHS->uses(), [&](SDNode *User) { |
47311 | return User->getOpcode() == HOpcode && User->getValueType(0) == VT; |
47312 | }); |
47313 | bool FoundHorizRHS = llvm::any_of(NewRHS->uses(), [&](SDNode *User) { |
47314 | return User->getOpcode() == HOpcode && User->getValueType(0) == VT; |
47315 | }); |
47316 | bool ForceHorizOp = FoundHorizLHS && FoundHorizRHS; |
47317 | |
47318 | |
47319 | |
47320 | if (!ForceHorizOp && |
47321 | !shouldUseHorizontalOp(NewLHS == NewRHS && |
47322 | (NumShuffles < 2 || !IsIdentityPostShuffle), |
47323 | DAG, Subtarget)) |
47324 | return false; |
47325 | |
47326 | LHS = DAG.getBitcast(VT, NewLHS); |
47327 | RHS = DAG.getBitcast(VT, NewRHS); |
47328 | return true; |
47329 | } |
47330 | |
47331 | |
47332 | static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG, |
47333 | const X86Subtarget &Subtarget) { |
47334 | EVT VT = N->getValueType(0); |
47335 | unsigned Opcode = N->getOpcode(); |
47336 | bool IsAdd = (Opcode == ISD::FADD) || (Opcode == ISD::ADD); |
47337 | SmallVector<int, 8> PostShuffleMask; |
47338 | |
47339 | switch (Opcode) { |
47340 | case ISD::FADD: |
47341 | case ISD::FSUB: |
47342 | if ((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || |
47343 | (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) { |
47344 | SDValue LHS = N->getOperand(0); |
47345 | SDValue RHS = N->getOperand(1); |
47346 | auto HorizOpcode = IsAdd ? X86ISD::FHADD : X86ISD::FHSUB; |
47347 | if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd, |
47348 | PostShuffleMask)) { |
47349 | SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS); |
47350 | if (!PostShuffleMask.empty()) |
47351 | HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, |
47352 | DAG.getUNDEF(VT), PostShuffleMask); |
47353 | return HorizBinOp; |
47354 | } |
47355 | } |
47356 | break; |
47357 | case ISD::ADD: |
47358 | case ISD::SUB: |
47359 | if (Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32 || |
47360 | VT == MVT::v16i16 || VT == MVT::v8i32)) { |
47361 | SDValue LHS = N->getOperand(0); |
47362 | SDValue RHS = N->getOperand(1); |
47363 | auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB; |
47364 | if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd, |
47365 | PostShuffleMask)) { |
47366 | auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL, |
47367 | ArrayRef<SDValue> Ops) { |
47368 | return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops); |
47369 | }; |
47370 | SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, |
47371 | {LHS, RHS}, HOpBuilder); |
47372 | if (!PostShuffleMask.empty()) |
47373 | HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, |
47374 | DAG.getUNDEF(VT), PostShuffleMask); |
47375 | return HorizBinOp; |
47376 | } |
47377 | } |
47378 | break; |
47379 | } |
47380 | |
47381 | return SDValue(); |
47382 | } |
47383 | |
47384 | |
47385 | |
47386 | |
47387 | |
47388 | |
47389 | |
47390 | |
47391 | |
47392 | |
47393 | |
47394 | |
47395 | |
47396 | |
47397 | static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG, |
47398 | const X86Subtarget &Subtarget) { |
47399 | EVT VT = N->getValueType(0); |
47400 | SDValue LHS = N->getOperand(0); |
47401 | SDValue RHS = N->getOperand(1); |
47402 | int CombineOpcode = |
47403 | N->getOpcode() == X86ISD::VFCMULC ? X86ISD::VFMULC : X86ISD::VFCMULC; |
47404 | auto isConjugationConstant = [](const Constant *c) { |
47405 | if (const auto *CI = dyn_cast<ConstantInt>(c)) { |
47406 | APInt ConjugationInt32 = APInt(32, 0x80000000, true); |
47407 | APInt ConjugationInt64 = APInt(64, 0x8000000080000000ULL, true); |
47408 | switch (CI->getBitWidth()) { |
47409 | case 16: |
47410 | return false; |
47411 | case 32: |
47412 | return CI->getValue() == ConjugationInt32; |
47413 | case 64: |
47414 | return CI->getValue() == ConjugationInt64; |
47415 | default: |
47416 | llvm_unreachable("Unexpected bit width"); |
47417 | } |
47418 | } |
47419 | if (const auto *CF = dyn_cast<ConstantFP>(c)) |
47420 | return CF->isNegativeZeroValue(); |
47421 | return false; |
47422 | }; |
47423 | auto combineConjugation = [&](SDValue &r) { |
47424 | if (LHS->getOpcode() == ISD::BITCAST && RHS.hasOneUse()) { |
47425 | SDValue XOR = LHS.getOperand(0); |
47426 | if (XOR->getOpcode() == ISD::XOR && XOR.hasOneUse()) { |
47427 | SDValue XORRHS = XOR.getOperand(1); |
47428 | if (XORRHS.getOpcode() == ISD::BITCAST && XORRHS.hasOneUse()) |
47429 | XORRHS = XORRHS.getOperand(0); |
47430 | if (XORRHS.getOpcode() == X86ISD::VBROADCAST_LOAD && |
47431 | XORRHS.getOperand(1).getNumOperands()) { |
47432 | ConstantPoolSDNode *CP = |
47433 | dyn_cast<ConstantPoolSDNode>(XORRHS.getOperand(1).getOperand(0)); |
47434 | if (CP && isConjugationConstant(CP->getConstVal())) { |
47435 | SelectionDAG::FlagInserter FlagsInserter(DAG, N); |
47436 | SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0)); |
47437 | SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F); |
47438 | r = DAG.getBitcast(VT, FCMulC); |
47439 | return true; |
47440 | } |
47441 | } |
47442 | } |
47443 | } |
47444 | return false; |
47445 | }; |
47446 | SDValue Res; |
47447 | if (combineConjugation(Res)) |
47448 | return Res; |
47449 | std::swap(LHS, RHS); |
47450 | if (combineConjugation(Res)) |
47451 | return Res; |
47452 | return Res; |
47453 | } |
47454 | |
47455 | |
47456 | |
47457 | |
47458 | |
47459 | |
47460 | |
47461 | |
47462 | |
47463 | |
47464 | static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG, |
47465 | const X86Subtarget &Subtarget) { |
47466 | auto AllowContract = [&DAG](SDNode *N) { |
47467 | return DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || |
47468 | N->getFlags().hasAllowContract(); |
47469 | }; |
47470 | if (N->getOpcode() != ISD::FADD || !Subtarget.hasFP16() || !AllowContract(N)) |
47471 | return SDValue(); |
47472 | |
47473 | EVT VT = N->getValueType(0); |
47474 | if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16) |
47475 | return SDValue(); |
47476 | |
47477 | SDValue LHS = N->getOperand(0); |
47478 | SDValue RHS = N->getOperand(1); |
47479 | SDValue CFmul, FAddOp1; |
47480 | auto GetCFmulFrom = [&CFmul, &AllowContract](SDValue N) -> bool { |
47481 | if (!N.hasOneUse() || N.getOpcode() != ISD::BITCAST) |
47482 | return false; |
47483 | SDValue Op0 = N.getOperand(0); |
47484 | unsigned Opcode = Op0.getOpcode(); |
47485 | if (Op0.hasOneUse() && AllowContract(Op0.getNode()) && |
47486 | (Opcode == X86ISD::VFMULC || Opcode == X86ISD::VFCMULC)) |
47487 | CFmul = Op0; |
47488 | return !!CFmul; |
47489 | }; |
47490 | |
47491 | if (GetCFmulFrom(LHS)) |
47492 | FAddOp1 = RHS; |
47493 | else if (GetCFmulFrom(RHS)) |
47494 | FAddOp1 = LHS; |
47495 | else |
47496 | return SDValue(); |
47497 | |
47498 | MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2); |
47499 | assert(CFmul->getValueType(0) == CVT && "Complex type mismatch"); |
47500 | FAddOp1 = DAG.getBitcast(CVT, FAddOp1); |
47501 | unsigned newOp = CFmul.getOpcode() == X86ISD::VFMULC ? X86ISD::VFMADDC |
47502 | : X86ISD::VFCMADDC; |
47503 | |
47504 | |
47505 | CFmul = DAG.getNode(newOp, SDLoc(N), CVT, FAddOp1, CFmul.getOperand(0), |
47506 | CFmul.getOperand(1), N->getFlags()); |
47507 | return DAG.getBitcast(VT, CFmul); |
47508 | } |
47509 | |
47510 | |
47511 | static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, |
47512 | const X86Subtarget &Subtarget) { |
47513 | if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget)) |
47514 | return HOp; |
47515 | |
47516 | if (SDValue COp = combineFaddCFmul(N, DAG, Subtarget)) |
47517 | return COp; |
47518 | |
47519 | return SDValue(); |
47520 | } |
47521 | |
47522 | |
47523 | |
47524 | |
47525 | |
47526 | |
47527 | static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, |
47528 | const X86Subtarget &Subtarget, |
47529 | const SDLoc &DL) { |
47530 | assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); |
47531 | SDValue Src = N->getOperand(0); |
47532 | unsigned SrcOpcode = Src.getOpcode(); |
47533 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
47534 | |
47535 | EVT VT = N->getValueType(0); |
47536 | EVT SrcVT = Src.getValueType(); |
47537 | |
47538 | auto IsFreeTruncation = [VT](SDValue Op) { |
47539 | unsigned TruncSizeInBits = VT.getScalarSizeInBits(); |
47540 | |
47541 | |
47542 | |
47543 | unsigned Opcode = Op.getOpcode(); |
47544 | if ((Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND || |
47545 | Opcode == ISD::ZERO_EXTEND) && |
47546 | Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits) |
47547 | return true; |
47548 | |
47549 | |
47550 | |
47551 | |
47552 | |
47553 | |
47554 | return ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); |
47555 | }; |
47556 | |
47557 | auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { |
47558 | SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); |
47559 | SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); |
47560 | return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1); |
47561 | }; |
47562 | |
47563 | |
47564 | if (!Src.hasOneUse()) |
47565 | return SDValue(); |
47566 | |
47567 | |
47568 | |
47569 | if (!VT.isVector()) |
47570 | return SDValue(); |
47571 | |
47572 | |
47573 | |
47574 | |
47575 | switch (SrcOpcode) { |
47576 | case ISD::MUL: |
47577 | |
47578 | |
47579 | if (SrcVT.getScalarType() == MVT::i64 && |
47580 | TLI.isOperationLegal(SrcOpcode, VT) && |
47581 | !TLI.isOperationLegal(SrcOpcode, SrcVT)) |
47582 | return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); |
47583 | LLVM_FALLTHROUGH; |
47584 | case ISD::AND: |
47585 | case ISD::XOR: |
47586 | case ISD::OR: |
47587 | case ISD::ADD: |
47588 | case ISD::SUB: { |
47589 | SDValue Op0 = Src.getOperand(0); |
47590 | SDValue Op1 = Src.getOperand(1); |
47591 | if (TLI.isOperationLegal(SrcOpcode, VT) && |
47592 | (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1))) |
47593 | return TruncateArithmetic(Op0, Op1); |
47594 | break; |
47595 | } |
47596 | } |
47597 | |
47598 | return SDValue(); |
47599 | } |
47600 | |
47601 | |
47602 | |
47603 | |
47604 | |
47605 | static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL, |
47606 | const X86Subtarget &Subtarget, |
47607 | SelectionDAG &DAG) { |
47608 | SDValue In = N->getOperand(0); |
47609 | EVT InVT = In.getValueType(); |
47610 | EVT OutVT = N->getValueType(0); |
47611 | |
47612 | APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(), |
47613 | OutVT.getScalarSizeInBits()); |
47614 | In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT)); |
47615 | return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget); |
47616 | } |
47617 | |
47618 | |
47619 | static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL, |
47620 | const X86Subtarget &Subtarget, |
47621 | SelectionDAG &DAG) { |
47622 | SDValue In = N->getOperand(0); |
47623 | EVT InVT = In.getValueType(); |
47624 | EVT OutVT = N->getValueType(0); |
47625 | In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In, |
47626 | DAG.getValueType(OutVT)); |
47627 | return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget); |
47628 | } |
47629 | |
47630 | |
47631 | |
47632 | |
47633 | |
47634 | |
47635 | static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, |
47636 | const X86Subtarget &Subtarget) { |
47637 | EVT OutVT = N->getValueType(0); |
47638 | if (!OutVT.isVector()) |
47639 | return SDValue(); |
47640 | |
47641 | SDValue In = N->getOperand(0); |
47642 | if (!In.getValueType().isSimple()) |
47643 | return SDValue(); |
47644 | |
47645 | EVT InVT = In.getValueType(); |
47646 | unsigned NumElems = OutVT.getVectorNumElements(); |
47647 | |
47648 | |
47649 | if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) |
47650 | return SDValue(); |
47651 | |
47652 | EVT OutSVT = OutVT.getVectorElementType(); |
47653 | EVT InSVT = InVT.getVectorElementType(); |
47654 | if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) && |
47655 | (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) && |
47656 | NumElems >= 8)) |
47657 | return SDValue(); |
47658 | |
47659 | |
47660 | if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64) |
47661 | return SDValue(); |
47662 | |
47663 | SDLoc DL(N); |
47664 | |
47665 | |
47666 | |
47667 | if (Subtarget.hasSSE41() || OutSVT == MVT::i8) |
47668 | return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG); |
47669 | if (InSVT == MVT::i32) |
47670 | return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG); |
47671 | |
47672 | return SDValue(); |
47673 | } |
47674 | |
47675 | |
47676 | |
47677 | |
47678 | static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, |
47679 | SelectionDAG &DAG, |
47680 | const X86Subtarget &Subtarget) { |
47681 | |
47682 | if (!Subtarget.hasSSE2()) |
47683 | return SDValue(); |
47684 | |
47685 | if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple()) |
47686 | return SDValue(); |
47687 | |
47688 | SDValue In = N->getOperand(0); |
47689 | if (!In.getValueType().isSimple()) |
47690 | return SDValue(); |
47691 | |
47692 | MVT VT = N->getValueType(0).getSimpleVT(); |
47693 | MVT SVT = VT.getScalarType(); |
47694 | |
47695 | MVT InVT = In.getValueType().getSimpleVT(); |
47696 | MVT InSVT = InVT.getScalarType(); |
47697 | |
47698 | |
47699 | if (!isPowerOf2_32(VT.getVectorNumElements())) |
47700 | return SDValue(); |
47701 | if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32) |
47702 | return SDValue(); |
47703 | if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64) |
47704 | return SDValue(); |
47705 | |
47706 | |
47707 | if (SVT == MVT::i32 && VT.getSizeInBits() < 128) |
47708 | return SDValue(); |
47709 | |
47710 | |
47711 | |
47712 | if (Subtarget.hasAVX512() && |
47713 | !(!Subtarget.useAVX512Regs() && VT.is256BitVector() && |
47714 | InVT.is512BitVector())) { |
47715 | |
47716 | |
47717 | SmallVector<SDValue> ConcatOps; |
47718 | if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps)) |
47719 | return SDValue(); |
47720 | } |
47721 | |
47722 | unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16); |
47723 | unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; |
47724 | |
47725 | |
47726 | |
47727 | KnownBits Known = DAG.computeKnownBits(In); |
47728 | unsigned NumLeadingZeroBits = Known.countMinLeadingZeros(); |
47729 | if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits)) |
47730 | return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget); |
47731 | |
47732 | |
47733 | |
47734 | unsigned NumSignBits = DAG.ComputeNumSignBits(In); |
47735 | |
47736 | |
47737 | |
47738 | |
47739 | if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits()) |
47740 | return SDValue(); |
47741 | |
47742 | unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits; |
47743 | if (NumSignBits > MinSignBits) |
47744 | return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget); |
47745 | |
47746 | |
47747 | |
47748 | |
47749 | if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode())) |
47750 | if (const APInt *ShAmt = DAG.getValidShiftAmountConstant( |
47751 | In, APInt::getAllOnesValue(VT.getVectorNumElements()))) { |
47752 | if (*ShAmt == MinSignBits) { |
47753 | SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops()); |
47754 | return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG, |
47755 | Subtarget); |
47756 | } |
47757 | } |
47758 | |
47759 | return SDValue(); |
47760 | } |
47761 | |
47762 | |
47763 | |
47764 | |
47765 | |
47766 | |
47767 | |
47768 | |
47769 | static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, |
47770 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
47771 | |
47772 | if (Src.getOpcode() != ISD::SRL || |
47773 | Src.getOperand(0).getOpcode() != ISD::MUL) |
47774 | return SDValue(); |
47775 | |
47776 | if (!Subtarget.hasSSE2()) |
47777 | return SDValue(); |
47778 | |
47779 | |
47780 | |
47781 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i16) |
47782 | return SDValue(); |
47783 | |
47784 | |
47785 | EVT InVT = Src.getValueType(); |
47786 | if (InVT.getVectorElementType().getSizeInBits() < 32) |
47787 | return SDValue(); |
47788 | |
47789 | |
47790 | APInt ShiftAmt; |
47791 | if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) || |
47792 | ShiftAmt != 16) |
47793 | return SDValue(); |
47794 | |
47795 | SDValue LHS = Src.getOperand(0).getOperand(0); |
47796 | SDValue RHS = Src.getOperand(0).getOperand(1); |
47797 | |
47798 | unsigned ExtOpc = LHS.getOpcode(); |
47799 | if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) || |
47800 | RHS.getOpcode() != ExtOpc) |
47801 | return SDValue(); |
47802 | |
47803 | |
47804 | LHS = LHS.getOperand(0); |
47805 | RHS = RHS.getOperand(0); |
47806 | |
47807 | |
47808 | if (LHS.getValueType() != VT || RHS.getValueType() != VT) |
47809 | return SDValue(); |
47810 | |
47811 | unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; |
47812 | return DAG.getNode(Opc, DL, VT, LHS, RHS); |
47813 | } |
47814 | |
47815 | |
47816 | |
47817 | |
47818 | |
47819 | |
47820 | |
47821 | |
47822 | |
47823 | static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG, |
47824 | const X86Subtarget &Subtarget, |
47825 | const SDLoc &DL) { |
47826 | if (!VT.isVector() || !Subtarget.hasSSSE3()) |
47827 | return SDValue(); |
47828 | |
47829 | unsigned NumElems = VT.getVectorNumElements(); |
47830 | EVT ScalarVT = VT.getVectorElementType(); |
47831 | if (ScalarVT != MVT::i16 || NumElems < 8 || !isPowerOf2_32(NumElems)) |
47832 | return SDValue(); |
47833 | |
47834 | SDValue SSatVal = detectSSatPattern(In, VT); |
47835 | if (!SSatVal || SSatVal.getOpcode() != ISD::ADD) |
47836 | return SDValue(); |
47837 | |
47838 | |
47839 | |
47840 | SDValue N0 = SSatVal.getOperand(0); |
47841 | SDValue N1 = SSatVal.getOperand(1); |
47842 | |
47843 | if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL) |
47844 | return SDValue(); |
47845 | |
47846 | SDValue N00 = N0.getOperand(0); |
47847 | SDValue N01 = N0.getOperand(1); |
47848 | SDValue N10 = N1.getOperand(0); |
47849 | SDValue N11 = N1.getOperand(1); |
47850 | |
47851 | |
47852 | |
47853 | if (N01.getOpcode() == ISD::ZERO_EXTEND) |
47854 | std::swap(N00, N01); |
47855 | if (N11.getOpcode() == ISD::ZERO_EXTEND) |
47856 | std::swap(N10, N11); |
47857 | |
47858 | |
47859 | if (N00.getOpcode() != ISD::ZERO_EXTEND || |
47860 | N01.getOpcode() != ISD::SIGN_EXTEND || |
47861 | N10.getOpcode() != ISD::ZERO_EXTEND || |
47862 | N11.getOpcode() != ISD::SIGN_EXTEND) |
47863 | return SDValue(); |
47864 | |
47865 | |
47866 | N00 = N00.getOperand(0); |
47867 | N01 = N01.getOperand(0); |
47868 | N10 = N10.getOperand(0); |
47869 | N11 = N11.getOperand(0); |
47870 | |
47871 | |
47872 | if (N00.getValueType().getVectorElementType() != MVT::i8 || |
47873 | N01.getValueType().getVectorElementType() != MVT::i8 || |
47874 | N10.getValueType().getVectorElementType() != MVT::i8 || |
47875 | N11.getValueType().getVectorElementType() != MVT::i8) |
47876 | return SDValue(); |
47877 | |
47878 | |
47879 | if (N00.getOpcode() != ISD::BUILD_VECTOR || |
47880 | N01.getOpcode() != ISD::BUILD_VECTOR || |
47881 | N10.getOpcode() != ISD::BUILD_VECTOR || |
47882 | N11.getOpcode() != ISD::BUILD_VECTOR) |
47883 | return SDValue(); |
47884 | |
47885 | |
47886 | |
47887 | |
47888 | |
47889 | |
47890 | |
47891 | |
47892 | |
47893 | SDValue ZExtIn, SExtIn; |
47894 | for (unsigned i = 0; i != NumElems; ++i) { |
47895 | SDValue N00Elt = N00.getOperand(i); |
47896 | SDValue N01Elt = N01.getOperand(i); |
47897 | SDValue N10Elt = N10.getOperand(i); |
47898 | SDValue N11Elt = N11.getOperand(i); |
47899 | |
47900 | if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
47901 | N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
47902 | N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
47903 | N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
47904 | return SDValue(); |
47905 | auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1)); |
47906 | auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1)); |
47907 | auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1)); |
47908 | auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1)); |
47909 | if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt) |
47910 | return SDValue(); |
47911 | unsigned IdxN00 = ConstN00Elt->getZExtValue(); |
47912 | unsigned IdxN01 = ConstN01Elt->getZExtValue(); |
47913 | unsigned IdxN10 = ConstN10Elt->getZExtValue(); |
47914 | unsigned IdxN11 = ConstN11Elt->getZExtValue(); |
47915 | |
47916 | if (IdxN00 > IdxN10) { |
47917 | std::swap(IdxN00, IdxN10); |
47918 | std::swap(IdxN01, IdxN11); |
47919 | } |
47920 | |
47921 | if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || |
47922 | IdxN01 != 2 * i || IdxN11 != 2 * i + 1) |
47923 | return SDValue(); |
47924 | SDValue N00In = N00Elt.getOperand(0); |
47925 | SDValue N01In = N01Elt.getOperand(0); |
47926 | SDValue N10In = N10Elt.getOperand(0); |
47927 | SDValue N11In = N11Elt.getOperand(0); |
47928 | |
47929 | if (!ZExtIn) { |
47930 | ZExtIn = N00In; |
47931 | SExtIn = N01In; |
47932 | } |
47933 | if (ZExtIn != N00In || SExtIn != N01In || |
47934 | ZExtIn != N10In || SExtIn != N11In) |
47935 | return SDValue(); |
47936 | } |
47937 | |
47938 | auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
47939 | ArrayRef<SDValue> Ops) { |
47940 | |
47941 | |
47942 | EVT InVT = Ops[0].getValueType(); |
47943 | assert(InVT.getScalarType() == MVT::i8 && |
47944 | "Unexpected scalar element type"); |
47945 | assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); |
47946 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
47947 | InVT.getVectorNumElements() / 2); |
47948 | return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]); |
47949 | }; |
47950 | return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn }, |
47951 | PMADDBuilder); |
47952 | } |
47953 | |
47954 | static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, |
47955 | const X86Subtarget &Subtarget) { |
47956 | EVT VT = N->getValueType(0); |
47957 | SDValue Src = N->getOperand(0); |
47958 | SDLoc DL(N); |
47959 | |
47960 | |
47961 | if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) |
47962 | return V; |
47963 | |
47964 | |
47965 | if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) |
47966 | return Avg; |
47967 | |
47968 | |
47969 | if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL)) |
47970 | return PMAdd; |
47971 | |
47972 | |
47973 | if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) |
47974 | return Val; |
47975 | |
47976 | |
47977 | if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget)) |
47978 | return V; |
47979 | |
47980 | |
47981 | |
47982 | if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { |
47983 | SDValue BCSrc = Src.getOperand(0); |
47984 | if (BCSrc.getValueType() == MVT::x86mmx) |
47985 | return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc); |
47986 | } |
47987 | |
47988 | |
47989 | if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget)) |
47990 | return V; |
47991 | |
47992 | return combineVectorTruncation(N, DAG, Subtarget); |
47993 | } |
47994 | |
47995 | static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG, |
47996 | TargetLowering::DAGCombinerInfo &DCI) { |
47997 | EVT VT = N->getValueType(0); |
47998 | SDValue In = N->getOperand(0); |
47999 | SDLoc DL(N); |
48000 | |
48001 | if (auto SSatVal = detectSSatPattern(In, VT)) |
48002 | return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); |
48003 | if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) |
48004 | return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); |
48005 | |
48006 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48007 | APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits())); |
48008 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) |
48009 | return SDValue(N, 0); |
48010 | |
48011 | return SDValue(); |
48012 | } |
48013 | |
48014 | |
48015 | |
48016 | |
48017 | |
48018 | |
48019 | |
48020 | |
48021 | |
48022 | |
48023 | static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) { |
48024 | if (N->getOpcode() == ISD::FNEG) |
48025 | return N->getOperand(0); |
48026 | |
48027 | |
48028 | if (Depth > SelectionDAG::MaxRecursionDepth) |
48029 | return SDValue(); |
48030 | |
48031 | unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); |
48032 | |
48033 | SDValue Op = peekThroughBitcasts(SDValue(N, 0)); |
48034 | EVT VT = Op->getValueType(0); |
48035 | |
48036 | |
48037 | if (VT.getScalarSizeInBits() != ScalarSize) |
48038 | return SDValue(); |
48039 | |
48040 | unsigned Opc = Op.getOpcode(); |
48041 | switch (Opc) { |
48042 | case ISD::VECTOR_SHUFFLE: { |
48043 | |
48044 | |
48045 | if (!Op.getOperand(1).isUndef()) |
48046 | return SDValue(); |
48047 | if (SDValue NegOp0 = isFNEG(DAG, Op.getOperand(0).getNode(), Depth + 1)) |
48048 | if (NegOp0.getValueType() == VT) |
48049 | return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT), |
48050 | cast<ShuffleVectorSDNode>(Op)->getMask()); |
48051 | break; |
48052 | } |
48053 | case ISD::INSERT_VECTOR_ELT: { |
48054 | |
48055 | |
48056 | SDValue InsVector = Op.getOperand(0); |
48057 | SDValue InsVal = Op.getOperand(1); |
48058 | if (!InsVector.isUndef()) |
48059 | return SDValue(); |
48060 | if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1)) |
48061 | if (NegInsVal.getValueType() == VT.getVectorElementType()) |
48062 | return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, |
48063 | NegInsVal, Op.getOperand(2)); |
48064 | break; |
48065 | } |
48066 | case ISD::FSUB: |
48067 | case ISD::XOR: |
48068 | case X86ISD::FXOR: { |
48069 | SDValue Op1 = Op.getOperand(1); |
48070 | SDValue Op0 = Op.getOperand(0); |
48071 | |
48072 | |
48073 | |
48074 | |
48075 | |
48076 | if (Opc == ISD::FSUB) |
48077 | std::swap(Op0, Op1); |
48078 | |
48079 | APInt UndefElts; |
48080 | SmallVector<APInt, 16> EltBits; |
48081 | |
48082 | |
48083 | if (getTargetConstantBitsFromNode(Op1, ScalarSize, UndefElts, EltBits, |
48084 | true, |
48085 | false)) { |
48086 | for (unsigned I = 0, E = EltBits.size(); I < E; I++) |
48087 | if (!UndefElts[I] && !EltBits[I].isSignMask()) |
48088 | return SDValue(); |
48089 | |
48090 | return peekThroughBitcasts(Op0); |
48091 | } |
48092 | } |
48093 | } |
48094 | |
48095 | return SDValue(); |
48096 | } |
48097 | |
48098 | static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc, |
48099 | bool NegRes) { |
48100 | if (NegMul) { |
48101 | switch (Opcode) { |
48102 | default: llvm_unreachable("Unexpected opcode"); |
48103 | case ISD::FMA: Opcode = X86ISD::FNMADD; break; |
48104 | case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FNMADD; break; |
48105 | case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; |
48106 | case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; |
48107 | case X86ISD::STRICT_FMSUB: Opcode = X86ISD::STRICT_FNMSUB; break; |
48108 | case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; |
48109 | case X86ISD::FNMADD: Opcode = ISD::FMA; break; |
48110 | case X86ISD::STRICT_FNMADD: Opcode = ISD::STRICT_FMA; break; |
48111 | case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; |
48112 | case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; |
48113 | case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FMSUB; break; |
48114 | case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; |
48115 | } |
48116 | } |
48117 | |
48118 | if (NegAcc) { |
48119 | switch (Opcode) { |
48120 | default: llvm_unreachable("Unexpected opcode"); |
48121 | case ISD::FMA: Opcode = X86ISD::FMSUB; break; |
48122 | case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FMSUB; break; |
48123 | case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; |
48124 | case X86ISD::FMSUB: Opcode = ISD::FMA; break; |
48125 | case X86ISD::STRICT_FMSUB: Opcode = ISD::STRICT_FMA; break; |
48126 | case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; |
48127 | case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; |
48128 | case X86ISD::STRICT_FNMADD: Opcode = X86ISD::STRICT_FNMSUB; break; |
48129 | case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; |
48130 | case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; |
48131 | case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FNMADD; break; |
48132 | case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; |
48133 | case X86ISD::FMADDSUB: Opcode = X86ISD::FMSUBADD; break; |
48134 | case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break; |
48135 | case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break; |
48136 | case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break; |
48137 | } |
48138 | } |
48139 | |
48140 | if (NegRes) { |
48141 | switch (Opcode) { |
48142 | |
48143 | default: llvm_unreachable("Unexpected opcode"); |
48144 | case ISD::FMA: Opcode = X86ISD::FNMSUB; break; |
48145 | case X86ISD::FMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; |
48146 | case X86ISD::FMSUB: Opcode = X86ISD::FNMADD; break; |
48147 | case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; |
48148 | case X86ISD::FNMADD: Opcode = X86ISD::FMSUB; break; |
48149 | case X86ISD::FNMADD_RND: Opcode = X86ISD::FMSUB_RND; break; |
48150 | case X86ISD::FNMSUB: Opcode = ISD::FMA; break; |
48151 | case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMADD_RND; break; |
48152 | } |
48153 | } |
48154 | |
48155 | return Opcode; |
48156 | } |
48157 | |
48158 | |
48159 | static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, |
48160 | TargetLowering::DAGCombinerInfo &DCI, |
48161 | const X86Subtarget &Subtarget) { |
48162 | EVT OrigVT = N->getValueType(0); |
48163 | SDValue Arg = isFNEG(DAG, N); |
48164 | if (!Arg) |
48165 | return SDValue(); |
48166 | |
48167 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48168 | EVT VT = Arg.getValueType(); |
48169 | EVT SVT = VT.getScalarType(); |
48170 | SDLoc DL(N); |
48171 | |
48172 | |
48173 | if (!TLI.isTypeLegal(VT)) |
48174 | return SDValue(); |
48175 | |
48176 | |
48177 | |
48178 | |
48179 | if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) && |
48180 | Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) { |
48181 | SDValue Zero = DAG.getConstantFP(0.0, DL, VT); |
48182 | SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), |
48183 | Arg.getOperand(1), Zero); |
48184 | return DAG.getBitcast(OrigVT, NewNode); |
48185 | } |
48186 | |
48187 | bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); |
48188 | bool LegalOperations = !DCI.isBeforeLegalizeOps(); |
48189 | if (SDValue NegArg = |
48190 | TLI.getNegatedExpression(Arg, DAG, LegalOperations, CodeSize)) |
48191 | return DAG.getBitcast(OrigVT, NegArg); |
48192 | |
48193 | return SDValue(); |
48194 | } |
48195 | |
48196 | SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
48197 | bool LegalOperations, |
48198 | bool ForCodeSize, |
48199 | NegatibleCost &Cost, |
48200 | unsigned Depth) const { |
48201 | |
48202 | if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) { |
48203 | Cost = NegatibleCost::Cheaper; |
48204 | return DAG.getBitcast(Op.getValueType(), Arg); |
48205 | } |
48206 | |
48207 | EVT VT = Op.getValueType(); |
48208 | EVT SVT = VT.getScalarType(); |
48209 | unsigned Opc = Op.getOpcode(); |
48210 | SDNodeFlags Flags = Op.getNode()->getFlags(); |
48211 | switch (Opc) { |
48212 | case ISD::FMA: |
48213 | case X86ISD::FMSUB: |
48214 | case X86ISD::FNMADD: |
48215 | case X86ISD::FNMSUB: |
48216 | case X86ISD::FMADD_RND: |
48217 | case X86ISD::FMSUB_RND: |
48218 | case X86ISD::FNMADD_RND: |
48219 | case X86ISD::FNMSUB_RND: { |
48220 | if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || |
48221 | !(SVT == MVT::f32 || SVT == MVT::f64) || |
48222 | !isOperationLegal(ISD::FMA, VT)) |
48223 | break; |
48224 | |
48225 | |
48226 | |
48227 | if (!Flags.hasNoSignedZeros()) |
48228 | break; |
48229 | |
48230 | |
48231 | |
48232 | SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue()); |
48233 | for (int i = 0; i != 3; ++i) |
48234 | NewOps[i] = getCheaperNegatedExpression( |
48235 | Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1); |
48236 | |
48237 | bool NegA = !!NewOps[0]; |
48238 | bool NegB = !!NewOps[1]; |
48239 | bool NegC = !!NewOps[2]; |
48240 | unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true); |
48241 | |
48242 | Cost = (NegA || NegB || NegC) ? NegatibleCost::Cheaper |
48243 | : NegatibleCost::Neutral; |
48244 | |
48245 | |
48246 | for (int i = 0, e = Op.getNumOperands(); i != e; ++i) |
48247 | if (!NewOps[i]) |
48248 | NewOps[i] = Op.getOperand(i); |
48249 | return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps); |
48250 | } |
48251 | case X86ISD::FRCP: |
48252 | if (SDValue NegOp0 = |
48253 | getNegatedExpression(Op.getOperand(0), DAG, LegalOperations, |
48254 | ForCodeSize, Cost, Depth + 1)) |
48255 | return DAG.getNode(Opc, SDLoc(Op), VT, NegOp0); |
48256 | break; |
48257 | } |
48258 | |
48259 | return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, |
48260 | ForCodeSize, Cost, Depth); |
48261 | } |
48262 | |
48263 | static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, |
48264 | const X86Subtarget &Subtarget) { |
48265 | MVT VT = N->getSimpleValueType(0); |
48266 | |
48267 | if (!VT.isVector() || !Subtarget.hasSSE2()) |
48268 | return SDValue(); |
48269 | |
48270 | SDLoc dl(N); |
48271 | |
48272 | unsigned IntBits = VT.getScalarSizeInBits(); |
48273 | MVT IntSVT = MVT::getIntegerVT(IntBits); |
48274 | MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits); |
48275 | |
48276 | SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0)); |
48277 | SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1)); |
48278 | unsigned IntOpcode; |
48279 | switch (N->getOpcode()) { |
48280 | default: llvm_unreachable("Unexpected FP logic op"); |
48281 | case X86ISD::FOR: IntOpcode = ISD::OR; break; |
48282 | case X86ISD::FXOR: IntOpcode = ISD::XOR; break; |
48283 | case X86ISD::FAND: IntOpcode = ISD::AND; break; |
48284 | case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; |
48285 | } |
48286 | SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); |
48287 | return DAG.getBitcast(VT, IntOp); |
48288 | } |
48289 | |
48290 | |
48291 | |
48292 | static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) { |
48293 | if (N->getOpcode() != ISD::XOR) |
48294 | return SDValue(); |
48295 | |
48296 | SDValue LHS = N->getOperand(0); |
48297 | if (!isOneConstant(N->getOperand(1)) || LHS->getOpcode() != X86ISD::SETCC) |
48298 | return SDValue(); |
48299 | |
48300 | X86::CondCode NewCC = X86::GetOppositeBranchCondition( |
48301 | X86::CondCode(LHS->getConstantOperandVal(0))); |
48302 | SDLoc DL(N); |
48303 | return getSETCC(NewCC, LHS->getOperand(1), DL, DAG); |
48304 | } |
48305 | |
48306 | static SDValue combineXor(SDNode *N, SelectionDAG &DAG, |
48307 | TargetLowering::DAGCombinerInfo &DCI, |
48308 | const X86Subtarget &Subtarget) { |
48309 | SDValue N0 = N->getOperand(0); |
48310 | SDValue N1 = N->getOperand(1); |
48311 | EVT VT = N->getValueType(0); |
48312 | |
48313 | |
48314 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { |
48315 | return DAG.getBitcast(MVT::v4i32, |
48316 | DAG.getNode(X86ISD::FXOR, SDLoc(N), MVT::v4f32, |
48317 | DAG.getBitcast(MVT::v4f32, N0), |
48318 | DAG.getBitcast(MVT::v4f32, N1))); |
48319 | } |
48320 | |
48321 | if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget)) |
48322 | return Cmp; |
48323 | |
48324 | if (SDValue R = combineBitOpWithMOVMSK(N, DAG)) |
48325 | return R; |
48326 | |
48327 | if (DCI.isBeforeLegalizeOps()) |
48328 | return SDValue(); |
48329 | |
48330 | if (SDValue SetCC = foldXor1SetCC(N, DAG)) |
48331 | return SetCC; |
48332 | |
48333 | if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) |
48334 | return RV; |
48335 | |
48336 | |
48337 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48338 | if (llvm::isAllOnesConstant(N1) && N0.getOpcode() == ISD::BITCAST && |
48339 | N0.getOperand(0).getValueType().isVector() && |
48340 | N0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && |
48341 | TLI.isTypeLegal(N0.getOperand(0).getValueType()) && N0.hasOneUse()) { |
48342 | return DAG.getBitcast(VT, DAG.getNOT(SDLoc(N), N0.getOperand(0), |
48343 | N0.getOperand(0).getValueType())); |
48344 | } |
48345 | |
48346 | |
48347 | |
48348 | if (ISD::isBuildVectorAllOnes(N1.getNode()) && VT.isVector() && |
48349 | VT.getVectorElementType() == MVT::i1 && |
48350 | N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(0).isUndef() && |
48351 | TLI.isTypeLegal(N0.getOperand(1).getValueType())) { |
48352 | return DAG.getNode( |
48353 | ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), |
48354 | DAG.getNOT(SDLoc(N), N0.getOperand(1), N0.getOperand(1).getValueType()), |
48355 | N0.getOperand(2)); |
48356 | } |
48357 | |
48358 | |
48359 | |
48360 | |
48361 | if ((N0.getOpcode() == ISD::TRUNCATE || N0.getOpcode() == ISD::ZERO_EXTEND) && |
48362 | N0.getOperand(0).getOpcode() == N->getOpcode()) { |
48363 | SDValue TruncExtSrc = N0.getOperand(0); |
48364 | auto *N1C = dyn_cast<ConstantSDNode>(N1); |
48365 | auto *N001C = dyn_cast<ConstantSDNode>(TruncExtSrc.getOperand(1)); |
48366 | if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) { |
48367 | SDLoc DL(N); |
48368 | SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT); |
48369 | SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT); |
48370 | return DAG.getNode(ISD::XOR, DL, VT, LHS, |
48371 | DAG.getNode(ISD::XOR, DL, VT, RHS, N1)); |
48372 | } |
48373 | } |
48374 | |
48375 | if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) |
48376 | return FPLogic; |
48377 | |
48378 | return combineFneg(N, DAG, DCI, Subtarget); |
48379 | } |
48380 | |
48381 | static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, |
48382 | TargetLowering::DAGCombinerInfo &DCI, |
48383 | const X86Subtarget &Subtarget) { |
48384 | EVT VT = N->getValueType(0); |
48385 | unsigned NumBits = VT.getSizeInBits(); |
48386 | |
48387 | |
48388 | |
48389 | |
48390 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48391 | APInt DemandedMask(APInt::getAllOnesValue(NumBits)); |
48392 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) |
48393 | return SDValue(N, 0); |
48394 | |
48395 | return SDValue(); |
48396 | } |
48397 | |
48398 | static bool isNullFPScalarOrVectorConst(SDValue V) { |
48399 | return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode()); |
48400 | } |
48401 | |
48402 | |
48403 | |
48404 | |
48405 | |
48406 | |
48407 | |
48408 | static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG, |
48409 | const X86Subtarget &Subtarget) { |
48410 | if (!isNullFPScalarOrVectorConst(V)) |
48411 | return SDValue(); |
48412 | |
48413 | if (V.getValueType().isVector()) |
48414 | return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V)); |
48415 | |
48416 | return V; |
48417 | } |
48418 | |
48419 | static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG, |
48420 | const X86Subtarget &Subtarget) { |
48421 | SDValue N0 = N->getOperand(0); |
48422 | SDValue N1 = N->getOperand(1); |
48423 | EVT VT = N->getValueType(0); |
48424 | SDLoc DL(N); |
48425 | |
48426 | |
48427 | if (!((VT == MVT::f32 && Subtarget.hasSSE1()) || |
48428 | (VT == MVT::f64 && Subtarget.hasSSE2()) || |
48429 | (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2()))) |
48430 | return SDValue(); |
48431 | |
48432 | auto isAllOnesConstantFP = [](SDValue V) { |
48433 | if (V.getSimpleValueType().isVector()) |
48434 | return ISD::isBuildVectorAllOnes(V.getNode()); |
48435 | auto *C = dyn_cast<ConstantFPSDNode>(V); |
48436 | return C && C->getConstantFPValue()->isAllOnesValue(); |
48437 | }; |
48438 | |
48439 | |
48440 | if (N0.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N0.getOperand(1))) |
48441 | return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1); |
48442 | |
48443 | |
48444 | if (N1.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N1.getOperand(1))) |
48445 | return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0); |
48446 | |
48447 | return SDValue(); |
48448 | } |
48449 | |
48450 | |
48451 | static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG, |
48452 | const X86Subtarget &Subtarget) { |
48453 | |
48454 | if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget)) |
48455 | return V; |
48456 | |
48457 | |
48458 | if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) |
48459 | return V; |
48460 | |
48461 | if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget)) |
48462 | return V; |
48463 | |
48464 | return lowerX86FPLogicOp(N, DAG, Subtarget); |
48465 | } |
48466 | |
48467 | |
48468 | static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG, |
48469 | const X86Subtarget &Subtarget) { |
48470 | |
48471 | if (isNullFPScalarOrVectorConst(N->getOperand(0))) |
48472 | return N->getOperand(1); |
48473 | |
48474 | |
48475 | if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) |
48476 | return V; |
48477 | |
48478 | return lowerX86FPLogicOp(N, DAG, Subtarget); |
48479 | } |
48480 | |
48481 | |
48482 | static SDValue combineFOr(SDNode *N, SelectionDAG &DAG, |
48483 | TargetLowering::DAGCombinerInfo &DCI, |
48484 | const X86Subtarget &Subtarget) { |
48485 | assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); |
48486 | |
48487 | |
48488 | if (isNullFPScalarOrVectorConst(N->getOperand(0))) |
48489 | return N->getOperand(1); |
48490 | |
48491 | |
48492 | if (isNullFPScalarOrVectorConst(N->getOperand(1))) |
48493 | return N->getOperand(0); |
48494 | |
48495 | if (SDValue NewVal = combineFneg(N, DAG, DCI, Subtarget)) |
48496 | return NewVal; |
48497 | |
48498 | return lowerX86FPLogicOp(N, DAG, Subtarget); |
48499 | } |
48500 | |
48501 | |
48502 | static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) { |
48503 | assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX); |
48504 | |
48505 | |
48506 | if (!DAG.getTarget().Options.NoNaNsFPMath || |
48507 | !DAG.getTarget().Options.NoSignedZerosFPMath) |
48508 | return SDValue(); |
48509 | |
48510 | |
48511 | |
48512 | unsigned NewOp = 0; |
48513 | switch (N->getOpcode()) { |
48514 | default: llvm_unreachable("unknown opcode"); |
48515 | case X86ISD::FMIN: NewOp = X86ISD::FMINC; break; |
48516 | case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break; |
48517 | } |
48518 | |
48519 | return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), |
48520 | N->getOperand(0), N->getOperand(1)); |
48521 | } |
48522 | |
48523 | static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, |
48524 | const X86Subtarget &Subtarget) { |
48525 | if (Subtarget.useSoftFloat()) |
48526 | return SDValue(); |
48527 | |
48528 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48529 | |
48530 | EVT VT = N->getValueType(0); |
48531 | if (!((Subtarget.hasSSE1() && VT == MVT::f32) || |
48532 | (Subtarget.hasSSE2() && VT == MVT::f64) || |
48533 | (Subtarget.hasFP16() && VT == MVT::f16) || |
48534 | (VT.isVector() && TLI.isTypeLegal(VT)))) |
48535 | return SDValue(); |
48536 | |
48537 | SDValue Op0 = N->getOperand(0); |
48538 | SDValue Op1 = N->getOperand(1); |
48539 | SDLoc DL(N); |
48540 | auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN; |
48541 | |
48542 | |
48543 | |
48544 | if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs()) |
48545 | return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); |
48546 | |
48547 | |
48548 | |
48549 | if (DAG.isKnownNeverNaN(Op1)) |
48550 | return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); |
48551 | if (DAG.isKnownNeverNaN(Op0)) |
48552 | return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags()); |
48553 | |
48554 | |
48555 | |
48556 | if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize()) |
48557 | return SDValue(); |
48558 | |
48559 | EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), |
48560 | VT); |
48561 | |
48562 | |
48563 | |
48564 | |
48565 | |
48566 | |
48567 | |
48568 | |
48569 | |
48570 | |
48571 | |
48572 | |
48573 | |
48574 | |
48575 | |
48576 | |
48577 | |
48578 | |
48579 | |
48580 | |
48581 | SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0); |
48582 | SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType, Op0, Op0, ISD::SETUO); |
48583 | |
48584 | |
48585 | |
48586 | return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); |
48587 | } |
48588 | |
48589 | static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, |
48590 | TargetLowering::DAGCombinerInfo &DCI) { |
48591 | EVT VT = N->getValueType(0); |
48592 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48593 | |
48594 | APInt KnownUndef, KnownZero; |
48595 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
48596 | if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, |
48597 | KnownZero, DCI)) |
48598 | return SDValue(N, 0); |
48599 | |
48600 | |
48601 | SDValue In = N->getOperand(0); |
48602 | MVT InVT = In.getSimpleValueType(); |
48603 | if (VT.getVectorNumElements() < InVT.getVectorNumElements() && |
48604 | ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { |
48605 | assert(InVT.is128BitVector() && "Expected 128-bit input vector"); |
48606 | LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0)); |
48607 | unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); |
48608 | MVT MemVT = MVT::getIntegerVT(NumBits); |
48609 | MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits); |
48610 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) { |
48611 | SDLoc dl(N); |
48612 | SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT, |
48613 | DAG.getBitcast(InVT, VZLoad)); |
48614 | DCI.CombineTo(N, Convert); |
48615 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
48616 | DCI.recursivelyDeleteUnusedNodes(LN); |
48617 | return SDValue(N, 0); |
48618 | } |
48619 | } |
48620 | |
48621 | return SDValue(); |
48622 | } |
48623 | |
48624 | static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, |
48625 | TargetLowering::DAGCombinerInfo &DCI) { |
48626 | bool IsStrict = N->isTargetStrictFPOpcode(); |
48627 | EVT VT = N->getValueType(0); |
48628 | |
48629 | |
48630 | SDValue In = N->getOperand(IsStrict ? 1 : 0); |
48631 | MVT InVT = In.getSimpleValueType(); |
48632 | if (VT.getVectorNumElements() < InVT.getVectorNumElements() && |
48633 | ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { |
48634 | assert(InVT.is128BitVector() && "Expected 128-bit input vector"); |
48635 | LoadSDNode *LN = cast<LoadSDNode>(In); |
48636 | unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); |
48637 | MVT MemVT = MVT::getFloatingPointVT(NumBits); |
48638 | MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits); |
48639 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) { |
48640 | SDLoc dl(N); |
48641 | if (IsStrict) { |
48642 | SDValue Convert = |
48643 | DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other}, |
48644 | {N->getOperand(0), DAG.getBitcast(InVT, VZLoad)}); |
48645 | DCI.CombineTo(N, Convert, Convert.getValue(1)); |
48646 | } else { |
48647 | SDValue Convert = |
48648 | DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad)); |
48649 | DCI.CombineTo(N, Convert); |
48650 | } |
48651 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
48652 | DCI.recursivelyDeleteUnusedNodes(LN); |
48653 | return SDValue(N, 0); |
48654 | } |
48655 | } |
48656 | |
48657 | return SDValue(); |
48658 | } |
48659 | |
48660 | |
48661 | static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, |
48662 | TargetLowering::DAGCombinerInfo &DCI, |
48663 | const X86Subtarget &Subtarget) { |
48664 | MVT VT = N->getSimpleValueType(0); |
48665 | |
48666 | |
48667 | if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) |
48668 | return N->getOperand(1); |
48669 | |
48670 | |
48671 | if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode())) |
48672 | return DAG.getConstant(0, SDLoc(N), VT); |
48673 | |
48674 | |
48675 | if (SDValue Not = IsNOT(N->getOperand(0), DAG)) |
48676 | return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), |
48677 | N->getOperand(1)); |
48678 | |
48679 | |
48680 | if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { |
48681 | SDValue Op(N, 0); |
48682 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
48683 | return Res; |
48684 | } |
48685 | |
48686 | return SDValue(); |
48687 | } |
48688 | |
48689 | static SDValue combineBT(SDNode *N, SelectionDAG &DAG, |
48690 | TargetLowering::DAGCombinerInfo &DCI) { |
48691 | SDValue N1 = N->getOperand(1); |
48692 | |
48693 | |
48694 | unsigned BitWidth = N1.getValueSizeInBits(); |
48695 | APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); |
48696 | if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(N1, DemandedMask, DCI)) { |
48697 | if (N->getOpcode() != ISD::DELETED_NODE) |
48698 | DCI.AddToWorklist(N); |
48699 | return SDValue(N, 0); |
48700 | } |
48701 | |
48702 | return SDValue(); |
48703 | } |
48704 | |
48705 | static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG, |
48706 | TargetLowering::DAGCombinerInfo &DCI) { |
48707 | bool IsStrict = N->getOpcode() == X86ISD::STRICT_CVTPH2PS; |
48708 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
48709 | |
48710 | if (N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) { |
48711 | APInt KnownUndef, KnownZero; |
48712 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48713 | APInt DemandedElts = APInt::getLowBitsSet(8, 4); |
48714 | if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, |
48715 | DCI)) { |
48716 | if (N->getOpcode() != ISD::DELETED_NODE) |
48717 | DCI.AddToWorklist(N); |
48718 | return SDValue(N, 0); |
48719 | } |
48720 | |
48721 | |
48722 | if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) { |
48723 | LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(IsStrict ? 1 : 0)); |
48724 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::i64, MVT::v2i64, DAG)) { |
48725 | SDLoc dl(N); |
48726 | if (IsStrict) { |
48727 | SDValue Convert = DAG.getNode( |
48728 | N->getOpcode(), dl, {MVT::v4f32, MVT::Other}, |
48729 | {N->getOperand(0), DAG.getBitcast(MVT::v8i16, VZLoad)}); |
48730 | DCI.CombineTo(N, Convert, Convert.getValue(1)); |
48731 | } else { |
48732 | SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32, |
48733 | DAG.getBitcast(MVT::v8i16, VZLoad)); |
48734 | DCI.CombineTo(N, Convert); |
48735 | } |
48736 | |
48737 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
48738 | DCI.recursivelyDeleteUnusedNodes(LN); |
48739 | return SDValue(N, 0); |
48740 | } |
48741 | } |
48742 | } |
48743 | |
48744 | return SDValue(); |
48745 | } |
48746 | |
48747 | |
48748 | static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) { |
48749 | assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); |
48750 | |
48751 | EVT DstVT = N->getValueType(0); |
48752 | |
48753 | SDValue N0 = N->getOperand(0); |
48754 | SDValue N1 = N->getOperand(1); |
48755 | EVT ExtraVT = cast<VTSDNode>(N1)->getVT(); |
48756 | |
48757 | if (ExtraVT != MVT::i8 && ExtraVT != MVT::i16) |
48758 | return SDValue(); |
48759 | |
48760 | |
48761 | SDValue IntermediateBitwidthOp; |
48762 | if ((N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::TRUNCATE) && |
48763 | N0.hasOneUse()) { |
48764 | IntermediateBitwidthOp = N0; |
48765 | N0 = N0.getOperand(0); |
48766 | } |
48767 | |
48768 | |
48769 | if (N0.getOpcode() != X86ISD::CMOV || !N0.hasOneUse()) |
48770 | return SDValue(); |
48771 | |
48772 | SDValue CMovOp0 = N0.getOperand(0); |
48773 | SDValue CMovOp1 = N0.getOperand(1); |
48774 | |
48775 | |
48776 | if (!isa<ConstantSDNode>(CMovOp0.getNode()) || |
48777 | !isa<ConstantSDNode>(CMovOp1.getNode())) |
48778 | return SDValue(); |
48779 | |
48780 | SDLoc DL(N); |
48781 | |
48782 | |
48783 | if (IntermediateBitwidthOp) { |
48784 | unsigned IntermediateOpc = IntermediateBitwidthOp.getOpcode(); |
48785 | CMovOp0 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp0); |
48786 | CMovOp1 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp1); |
48787 | } |
48788 | |
48789 | CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp0, N1); |
48790 | CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp1, N1); |
48791 | |
48792 | EVT CMovVT = DstVT; |
48793 | |
48794 | if (DstVT == MVT::i16) { |
48795 | CMovVT = MVT::i32; |
48796 | CMovOp0 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp0); |
48797 | CMovOp1 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp1); |
48798 | } |
48799 | |
48800 | SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, CMovVT, CMovOp0, CMovOp1, |
48801 | N0.getOperand(2), N0.getOperand(3)); |
48802 | |
48803 | if (CMovVT != DstVT) |
48804 | CMov = DAG.getNode(ISD::TRUNCATE, DL, DstVT, CMov); |
48805 | |
48806 | return CMov; |
48807 | } |
48808 | |
48809 | static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG, |
48810 | const X86Subtarget &Subtarget) { |
48811 | assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); |
48812 | |
48813 | if (SDValue V = combineSextInRegCmov(N, DAG)) |
48814 | return V; |
48815 | |
48816 | EVT VT = N->getValueType(0); |
48817 | SDValue N0 = N->getOperand(0); |
48818 | SDValue N1 = N->getOperand(1); |
48819 | EVT ExtraVT = cast<VTSDNode>(N1)->getVT(); |
48820 | SDLoc dl(N); |
48821 | |
48822 | |
48823 | |
48824 | |
48825 | |
48826 | |
48827 | if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || |
48828 | N0.getOpcode() == ISD::SIGN_EXTEND)) { |
48829 | SDValue N00 = N0.getOperand(0); |
48830 | |
48831 | |
48832 | |
48833 | if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256()) |
48834 | if (!ISD::isNormalLoad(N00.getNode())) |
48835 | return SDValue(); |
48836 | |
48837 | |
48838 | |
48839 | if (SDValue Promote = PromoteMaskArithmetic(N0.getNode(), DAG, Subtarget)) |
48840 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Promote, N1); |
48841 | |
48842 | if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { |
48843 | SDValue Tmp = |
48844 | DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1); |
48845 | return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); |
48846 | } |
48847 | } |
48848 | return SDValue(); |
48849 | } |
48850 | |
48851 | |
48852 | |
48853 | |
48854 | |
48855 | |
48856 | static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, |
48857 | const X86Subtarget &Subtarget) { |
48858 | if (Ext->getOpcode() != ISD::SIGN_EXTEND && |
48859 | Ext->getOpcode() != ISD::ZERO_EXTEND) |
48860 | return SDValue(); |
48861 | |
48862 | |
48863 | EVT VT = Ext->getValueType(0); |
48864 | if (VT != MVT::i64) |
48865 | return SDValue(); |
48866 | |
48867 | SDValue Add = Ext->getOperand(0); |
48868 | if (Add.getOpcode() != ISD::ADD) |
48869 | return SDValue(); |
48870 | |
48871 | bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND; |
48872 | bool NSW = Add->getFlags().hasNoSignedWrap(); |
48873 | bool NUW = Add->getFlags().hasNoUnsignedWrap(); |
48874 | |
48875 | |
48876 | |
48877 | if ((Sext && !NSW) || (!Sext && !NUW)) |
48878 | return SDValue(); |
48879 | |
48880 | |
48881 | |
48882 | |
48883 | auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1)); |
48884 | if (!AddOp1) |
48885 | return SDValue(); |
48886 | |
48887 | |
48888 | |
48889 | |
48890 | |
48891 | |
48892 | bool HasLEAPotential = false; |
48893 | for (auto *User : Ext->uses()) { |
48894 | if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) { |
48895 | HasLEAPotential = true; |
48896 | break; |
48897 | } |
48898 | } |
48899 | if (!HasLEAPotential) |
48900 | return SDValue(); |
48901 | |
48902 | |
48903 | int64_t AddConstant = Sext ? AddOp1->getSExtValue() : AddOp1->getZExtValue(); |
48904 | SDValue AddOp0 = Add.getOperand(0); |
48905 | SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0); |
48906 | SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT); |
48907 | |
48908 | |
48909 | |
48910 | SDNodeFlags Flags; |
48911 | Flags.setNoSignedWrap(NSW); |
48912 | Flags.setNoUnsignedWrap(NUW); |
48913 | return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags); |
48914 | } |
48915 | |
48916 | |
48917 | |
48918 | |
48919 | |
48920 | |
48921 | |
48922 | |
48923 | |
48924 | |
48925 | |
48926 | |
48927 | |
48928 | static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) { |
48929 | SDValue CMovN = Extend->getOperand(0); |
48930 | if (CMovN.getOpcode() != X86ISD::CMOV || !CMovN.hasOneUse()) |
48931 | return SDValue(); |
48932 | |
48933 | EVT TargetVT = Extend->getValueType(0); |
48934 | unsigned ExtendOpcode = Extend->getOpcode(); |
48935 | SDLoc DL(Extend); |
48936 | |
48937 | EVT VT = CMovN.getValueType(); |
48938 | SDValue CMovOp0 = CMovN.getOperand(0); |
48939 | SDValue CMovOp1 = CMovN.getOperand(1); |
48940 | |
48941 | if (!isa<ConstantSDNode>(CMovOp0.getNode()) || |
48942 | !isa<ConstantSDNode>(CMovOp1.getNode())) |
48943 | return SDValue(); |
48944 | |
48945 | |
48946 | if (TargetVT != MVT::i32 && TargetVT != MVT::i64) |
48947 | return SDValue(); |
48948 | |
48949 | |
48950 | |
48951 | if (VT != MVT::i16 && !(ExtendOpcode == ISD::SIGN_EXTEND && VT == MVT::i32)) |
48952 | return SDValue(); |
48953 | |
48954 | |
48955 | |
48956 | EVT ExtendVT = TargetVT; |
48957 | if (TargetVT == MVT::i64 && ExtendOpcode != ISD::SIGN_EXTEND) |
48958 | ExtendVT = MVT::i32; |
48959 | |
48960 | CMovOp0 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp0); |
48961 | CMovOp1 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp1); |
48962 | |
48963 | SDValue Res = DAG.getNode(X86ISD::CMOV, DL, ExtendVT, CMovOp0, CMovOp1, |
48964 | CMovN.getOperand(2), CMovN.getOperand(3)); |
48965 | |
48966 | |
48967 | if (ExtendVT != TargetVT) |
48968 | Res = DAG.getNode(ExtendOpcode, DL, TargetVT, Res); |
48969 | |
48970 | return Res; |
48971 | } |
48972 | |
48973 | |
48974 | |
48975 | static SDValue |
48976 | combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG, |
48977 | TargetLowering::DAGCombinerInfo &DCI, |
48978 | const X86Subtarget &Subtarget) { |
48979 | unsigned Opcode = N->getOpcode(); |
48980 | if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND && |
48981 | Opcode != ISD::ANY_EXTEND) |
48982 | return SDValue(); |
48983 | if (!DCI.isBeforeLegalizeOps()) |
48984 | return SDValue(); |
48985 | if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) |
48986 | return SDValue(); |
48987 | |
48988 | SDValue N0 = N->getOperand(0); |
48989 | EVT VT = N->getValueType(0); |
48990 | EVT SVT = VT.getScalarType(); |
48991 | EVT InSVT = N0.getValueType().getScalarType(); |
48992 | unsigned EltSizeInBits = SVT.getSizeInBits(); |
48993 | |
48994 | |
48995 | |
48996 | if (!VT.isVector()) |
48997 | return SDValue(); |
48998 | if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8) |
48999 | return SDValue(); |
49000 | if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST) |
49001 | return SDValue(); |
49002 | |
49003 | SDValue N00 = N0.getOperand(0); |
49004 | EVT SclVT = N0.getOperand(0).getValueType(); |
49005 | if (!SclVT.isScalarInteger()) |
49006 | return SDValue(); |
49007 | |
49008 | SDLoc DL(N); |
49009 | SDValue Vec; |
49010 | SmallVector<int, 32> ShuffleMask; |
49011 | unsigned NumElts = VT.getVectorNumElements(); |
49012 | assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size"); |
49013 | |
49014 | |
49015 | if (NumElts > EltSizeInBits) { |
49016 | |
49017 | |
49018 | |
49019 | |
49020 | assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale"); |
49021 | unsigned Scale = NumElts / EltSizeInBits; |
49022 | EVT BroadcastVT = |
49023 | EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits); |
49024 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); |
49025 | Vec = DAG.getBitcast(VT, Vec); |
49026 | |
49027 | for (unsigned i = 0; i != Scale; ++i) |
49028 | ShuffleMask.append(EltSizeInBits, i); |
49029 | Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); |
49030 | } else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits && |
49031 | (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) { |
49032 | |
49033 | |
49034 | |
49035 | |
49036 | assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale"); |
49037 | unsigned Scale = EltSizeInBits / NumElts; |
49038 | EVT BroadcastVT = |
49039 | EVT::getVectorVT(*DAG.getContext(), SclVT, NumElts * Scale); |
49040 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); |
49041 | ShuffleMask.append(NumElts * Scale, 0); |
49042 | Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask); |
49043 | Vec = DAG.getBitcast(VT, Vec); |
49044 | } else { |
49045 | |
49046 | |
49047 | |
49048 | SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT); |
49049 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); |
49050 | ShuffleMask.append(NumElts, 0); |
49051 | Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); |
49052 | } |
49053 | |
49054 | |
49055 | SmallVector<SDValue, 32> Bits; |
49056 | for (unsigned i = 0; i != NumElts; ++i) { |
49057 | int BitIdx = (i % EltSizeInBits); |
49058 | APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1); |
49059 | Bits.push_back(DAG.getConstant(Bit, DL, SVT)); |
49060 | } |
49061 | SDValue BitMask = DAG.getBuildVector(VT, DL, Bits); |
49062 | Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask); |
49063 | |
49064 | |
49065 | EVT CCVT = VT.changeVectorElementType(MVT::i1); |
49066 | Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ); |
49067 | Vec = DAG.getSExtOrTrunc(Vec, DL, VT); |
49068 | |
49069 | |
49070 | |
49071 | if (Opcode == ISD::SIGN_EXTEND) |
49072 | return Vec; |
49073 | return DAG.getNode(ISD::SRL, DL, VT, Vec, |
49074 | DAG.getConstant(EltSizeInBits - 1, DL, VT)); |
49075 | } |
49076 | |
49077 | |
49078 | |
49079 | static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, |
49080 | const X86Subtarget &Subtarget) { |
49081 | SDValue N0 = N->getOperand(0); |
49082 | EVT VT = N->getValueType(0); |
49083 | SDLoc dl(N); |
49084 | |
49085 | |
49086 | if (!Subtarget.hasAVX512() || !VT.isVector() || N0.getOpcode() != ISD::SETCC) |
49087 | return SDValue(); |
49088 | |
49089 | |
49090 | EVT SVT = VT.getVectorElementType(); |
49091 | if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && |
49092 | SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64) |
49093 | return SDValue(); |
49094 | |
49095 | |
49096 | if (N0.getOperand(0).getValueType().getVectorElementType() == MVT::f16) |
49097 | return SDValue(); |
49098 | |
49099 | unsigned Size = VT.getSizeInBits(); |
49100 | if (Size > 256 && Subtarget.useAVX512Regs()) |
49101 | return SDValue(); |
49102 | |
49103 | |
49104 | |
49105 | ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); |
49106 | if (ISD::isUnsignedIntSetCC(CC)) |
49107 | return SDValue(); |
49108 | |
49109 | |
49110 | EVT N00VT = N0.getOperand(0).getValueType(); |
49111 | EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); |
49112 | if (Size != MatchingVecType.getSizeInBits()) |
49113 | return SDValue(); |
49114 | |
49115 | SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); |
49116 | |
49117 | if (N->getOpcode() == ISD::ZERO_EXTEND) |
49118 | Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType()); |
49119 | |
49120 | return Res; |
49121 | } |
49122 | |
49123 | static SDValue combineSext(SDNode *N, SelectionDAG &DAG, |
49124 | TargetLowering::DAGCombinerInfo &DCI, |
49125 | const X86Subtarget &Subtarget) { |
49126 | SDValue N0 = N->getOperand(0); |
49127 | EVT VT = N->getValueType(0); |
49128 | SDLoc DL(N); |
49129 | |
49130 | |
49131 | if (!DCI.isBeforeLegalizeOps() && |
49132 | N0.getOpcode() == X86ISD::SETCC_CARRY) { |
49133 | SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0), |
49134 | N0->getOperand(1)); |
49135 | bool ReplaceOtherUses = !N0.hasOneUse(); |
49136 | DCI.CombineTo(N, Setcc); |
49137 | |
49138 | if (ReplaceOtherUses) { |
49139 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
49140 | N0.getValueType(), Setcc); |
49141 | DCI.CombineTo(N0.getNode(), Trunc); |
49142 | } |
49143 | |
49144 | return SDValue(N, 0); |
49145 | } |
49146 | |
49147 | if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) |
49148 | return NewCMov; |
49149 | |
49150 | if (!DCI.isBeforeLegalizeOps()) |
49151 | return SDValue(); |
49152 | |
49153 | if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) |
49154 | return V; |
49155 | |
49156 | if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) |
49157 | return V; |
49158 | |
49159 | if (VT.isVector()) { |
49160 | if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget)) |
49161 | return R; |
49162 | |
49163 | if (N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) |
49164 | return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0)); |
49165 | } |
49166 | |
49167 | if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) |
49168 | return NewAdd; |
49169 | |
49170 | return SDValue(); |
49171 | } |
49172 | |
49173 | static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, |
49174 | TargetLowering::DAGCombinerInfo &DCI, |
49175 | const X86Subtarget &Subtarget) { |
49176 | SDLoc dl(N); |
49177 | EVT VT = N->getValueType(0); |
49178 | bool IsStrict = N->isStrictFPOpcode() || N->isTargetStrictFPOpcode(); |
49179 | |
49180 | |
49181 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
49182 | if (!TLI.isTypeLegal(VT)) |
49183 | return SDValue(); |
49184 | |
49185 | SDValue A = N->getOperand(IsStrict ? 1 : 0); |
49186 | SDValue B = N->getOperand(IsStrict ? 2 : 1); |
49187 | SDValue C = N->getOperand(IsStrict ? 3 : 2); |
49188 | |
49189 | |
49190 | |
49191 | SDNodeFlags Flags = N->getFlags(); |
49192 | if (!IsStrict && Flags.hasAllowReassociation() && |
49193 | TLI.isOperationExpand(ISD::FMA, VT)) { |
49194 | SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags); |
49195 | return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags); |
49196 | } |
49197 | |
49198 | EVT ScalarVT = VT.getScalarType(); |
49199 | if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || |
49200 | !Subtarget.hasAnyFMA()) && |
49201 | !(ScalarVT == MVT::f16 && Subtarget.hasFP16())) |
49202 | return SDValue(); |
49203 | |
49204 | auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { |
49205 | bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); |
49206 | bool LegalOperations = !DCI.isBeforeLegalizeOps(); |
49207 | if (SDValue NegV = TLI.getCheaperNegatedExpression(V, DAG, LegalOperations, |
49208 | CodeSize)) { |
49209 | V = NegV; |
49210 | return true; |
49211 | } |
49212 | |
49213 | |
49214 | if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
49215 | isNullConstant(V.getOperand(1))) { |
49216 | SDValue Vec = V.getOperand(0); |
49217 | if (SDValue NegV = TLI.getCheaperNegatedExpression( |
49218 | Vec, DAG, LegalOperations, CodeSize)) { |
49219 | V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), |
49220 | NegV, V.getOperand(1)); |
49221 | return true; |
49222 | } |
49223 | } |
49224 | |
49225 | return false; |
49226 | }; |
49227 | |
49228 | |
49229 | |
49230 | bool NegA = invertIfNegative(A); |
49231 | bool NegB = invertIfNegative(B); |
49232 | bool NegC = invertIfNegative(C); |
49233 | |
49234 | if (!NegA && !NegB && !NegC) |
49235 | return SDValue(); |
49236 | |
49237 | unsigned NewOpcode = |
49238 | negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false); |
49239 | |
49240 | |
49241 | SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); |
49242 | if (IsStrict) { |
49243 | assert(N->getNumOperands() == 4 && "Shouldn't be greater than 4"); |
49244 | return DAG.getNode(NewOpcode, dl, {VT, MVT::Other}, |
49245 | {N->getOperand(0), A, B, C}); |
49246 | } else { |
49247 | if (N->getNumOperands() == 4) |
49248 | return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3)); |
49249 | return DAG.getNode(NewOpcode, dl, VT, A, B, C); |
49250 | } |
49251 | } |
49252 | |
49253 | |
49254 | |
49255 | static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG, |
49256 | TargetLowering::DAGCombinerInfo &DCI) { |
49257 | SDLoc dl(N); |
49258 | EVT VT = N->getValueType(0); |
49259 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
49260 | bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); |
49261 | bool LegalOperations = !DCI.isBeforeLegalizeOps(); |
49262 | |
49263 | SDValue N2 = N->getOperand(2); |
49264 | |
49265 | SDValue NegN2 = |
49266 | TLI.getCheaperNegatedExpression(N2, DAG, LegalOperations, CodeSize); |
49267 | if (!NegN2) |
49268 | return SDValue(); |
49269 | unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false); |
49270 | |
49271 | if (N->getNumOperands() == 4) |
49272 | return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), |
49273 | NegN2, N->getOperand(3)); |
49274 | return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), |
49275 | NegN2); |
49276 | } |
49277 | |
49278 | static SDValue combineZext(SDNode *N, SelectionDAG &DAG, |
49279 | TargetLowering::DAGCombinerInfo &DCI, |
49280 | const X86Subtarget &Subtarget) { |
49281 | SDLoc dl(N); |
49282 | SDValue N0 = N->getOperand(0); |
49283 | EVT VT = N->getValueType(0); |
49284 | |
49285 | |
49286 | |
49287 | if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND && |
49288 | N0.getOpcode() == X86ISD::SETCC_CARRY) { |
49289 | SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0), |
49290 | N0->getOperand(1)); |
49291 | bool ReplaceOtherUses = !N0.hasOneUse(); |
49292 | DCI.CombineTo(N, Setcc); |
49293 | |
49294 | if (ReplaceOtherUses) { |
49295 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
49296 | N0.getValueType(), Setcc); |
49297 | DCI.CombineTo(N0.getNode(), Trunc); |
49298 | } |
49299 | |
49300 | return SDValue(N, 0); |
49301 | } |
49302 | |
49303 | if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) |
49304 | return NewCMov; |
49305 | |
49306 | if (DCI.isBeforeLegalizeOps()) |
49307 | if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) |
49308 | return V; |
49309 | |
49310 | if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) |
49311 | return V; |
49312 | |
49313 | if (VT.isVector()) |
49314 | if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget)) |
49315 | return R; |
49316 | |
49317 | if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) |
49318 | return NewAdd; |
49319 | |
49320 | if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget)) |
49321 | return R; |
49322 | |
49323 | |
49324 | if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 && |
49325 | VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) { |
49326 | SDValue N00 = N0.getOperand(0); |
49327 | SDValue N01 = N0.getOperand(1); |
49328 | unsigned NumSrcEltBits = N00.getScalarValueSizeInBits(); |
49329 | APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2); |
49330 | if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) && |
49331 | (N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) { |
49332 | return concatSubVectors(N00, N01, DAG, dl); |
49333 | } |
49334 | } |
49335 | |
49336 | return SDValue(); |
49337 | } |
49338 | |
49339 | |
49340 | |
49341 | static bool isOrXorXorTree(SDValue X, bool Root = true) { |
49342 | if (X.getOpcode() == ISD::OR) |
49343 | return isOrXorXorTree(X.getOperand(0), false) && |
49344 | isOrXorXorTree(X.getOperand(1), false); |
49345 | if (Root) |
49346 | return false; |
49347 | return X.getOpcode() == ISD::XOR; |
49348 | } |
49349 | |
49350 | |
49351 | |
49352 | template<typename F> |
49353 | static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG, |
49354 | EVT VecVT, EVT CmpVT, bool HasPT, F SToV) { |
49355 | SDValue Op0 = X.getOperand(0); |
49356 | SDValue Op1 = X.getOperand(1); |
49357 | if (X.getOpcode() == ISD::OR) { |
49358 | SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT, HasPT, SToV); |
49359 | SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT, HasPT, SToV); |
49360 | if (VecVT != CmpVT) |
49361 | return DAG.getNode(ISD::OR, DL, CmpVT, A, B); |
49362 | if (HasPT) |
49363 | return DAG.getNode(ISD::OR, DL, VecVT, A, B); |
49364 | return DAG.getNode(ISD::AND, DL, CmpVT, A, B); |
49365 | } else if (X.getOpcode() == ISD::XOR) { |
49366 | SDValue A = SToV(Op0); |
49367 | SDValue B = SToV(Op1); |
49368 | if (VecVT != CmpVT) |
49369 | return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE); |
49370 | if (HasPT) |
49371 | return DAG.getNode(ISD::XOR, DL, VecVT, A, B); |
49372 | return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); |
49373 | } |
49374 | llvm_unreachable("Impossible"); |
49375 | } |
49376 | |
49377 | |
49378 | |
49379 | static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, |
49380 | const X86Subtarget &Subtarget) { |
49381 | ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); |
49382 | assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); |
49383 | |
49384 | |
49385 | SDValue X = SetCC->getOperand(0); |
49386 | SDValue Y = SetCC->getOperand(1); |
49387 | EVT OpVT = X.getValueType(); |
49388 | unsigned OpSize = OpVT.getSizeInBits(); |
49389 | if (!OpVT.isScalarInteger() || OpSize < 128) |
49390 | return SDValue(); |
49391 | |
49392 | |
49393 | |
49394 | |
49395 | |
49396 | |
49397 | bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); |
49398 | if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) |
49399 | return SDValue(); |
49400 | |
49401 | |
49402 | auto IsVectorBitCastCheap = [](SDValue X) { |
49403 | X = peekThroughBitcasts(X); |
49404 | return isa<ConstantSDNode>(X) || X.getValueType().isVector() || |
49405 | X.getOpcode() == ISD::LOAD; |
49406 | }; |
49407 | if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) && |
49408 | !IsOrXorXorTreeCCZero) |
49409 | return SDValue(); |
49410 | |
49411 | EVT VT = SetCC->getValueType(0); |
49412 | SDLoc DL(SetCC); |
49413 | |
49414 | |
49415 | |
49416 | |
49417 | if ((OpSize == 128 && Subtarget.hasSSE2()) || |
49418 | (OpSize == 256 && Subtarget.hasAVX()) || |
49419 | (OpSize == 512 && Subtarget.useAVX512Regs())) { |
49420 | bool HasPT = Subtarget.hasSSE41(); |
49421 | |
49422 | |
49423 | |
49424 | |
49425 | bool PreferKOT = Subtarget.preferMaskRegisters(); |
49426 | bool NeedZExt = PreferKOT && !Subtarget.hasVLX() && OpSize != 512; |
49427 | |
49428 | EVT VecVT = MVT::v16i8; |
49429 | EVT CmpVT = PreferKOT ? MVT::v16i1 : VecVT; |
49430 | if (OpSize == 256) { |
49431 | VecVT = MVT::v32i8; |
49432 | CmpVT = PreferKOT ? MVT::v32i1 : VecVT; |
49433 | } |
49434 | EVT CastVT = VecVT; |
49435 | bool NeedsAVX512FCast = false; |
49436 | if (OpSize == 512 || NeedZExt) { |
49437 | if (Subtarget.hasBWI()) { |
49438 | VecVT = MVT::v64i8; |
49439 | CmpVT = MVT::v64i1; |
49440 | if (OpSize == 512) |
49441 | CastVT = VecVT; |
49442 | } else { |
49443 | VecVT = MVT::v16i32; |
49444 | CmpVT = MVT::v16i1; |
49445 | CastVT = OpSize == 512 ? VecVT : |
49446 | OpSize == 256 ? MVT::v8i32 : MVT::v4i32; |
49447 | NeedsAVX512FCast = true; |
49448 | } |
49449 | } |
49450 | |
49451 | auto ScalarToVector = [&](SDValue X) -> SDValue { |
49452 | bool TmpZext = false; |
49453 | EVT TmpCastVT = CastVT; |
49454 | if (X.getOpcode() == ISD::ZERO_EXTEND) { |
49455 | SDValue OrigX = X.getOperand(0); |
49456 | unsigned OrigSize = OrigX.getScalarValueSizeInBits(); |
49457 | if (OrigSize < OpSize) { |
49458 | if (OrigSize == 128) { |
49459 | TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8; |
49460 | X = OrigX; |
49461 | TmpZext = true; |
49462 | } else if (OrigSize == 256) { |
49463 | TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8; |
49464 | X = OrigX; |
49465 | TmpZext = true; |
49466 | } |
49467 | } |
49468 | } |
49469 | X = DAG.getBitcast(TmpCastVT, X); |
49470 | if (!NeedZExt && !TmpZext) |
49471 | return X; |
49472 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, |
49473 | DAG.getConstant(0, DL, VecVT), X, |
49474 | DAG.getVectorIdxConstant(0, DL)); |
49475 | }; |
49476 | |
49477 | SDValue Cmp; |
49478 | if (IsOrXorXorTreeCCZero) { |
49479 | |
49480 | |
49481 | |
49482 | |
49483 | Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT, HasPT, ScalarToVector); |
49484 | } else { |
49485 | SDValue VecX = ScalarToVector(X); |
49486 | SDValue VecY = ScalarToVector(Y); |
49487 | if (VecVT != CmpVT) { |
49488 | Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE); |
49489 | } else if (HasPT) { |
49490 | Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY); |
49491 | } else { |
49492 | Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); |
49493 | } |
49494 | } |
49495 | |
49496 | if (VecVT != CmpVT) { |
49497 | EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 : |
49498 | CmpVT == MVT::v32i1 ? MVT::i32 : MVT::i16; |
49499 | return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp), |
49500 | DAG.getConstant(0, DL, KRegVT), CC); |
49501 | } |
49502 | if (HasPT) { |
49503 | SDValue BCCmp = DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64, |
49504 | Cmp); |
49505 | SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp); |
49506 | X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; |
49507 | SDValue X86SetCC = getSETCC(X86CC, PT, DL, DAG); |
49508 | return DAG.getNode(ISD::TRUNCATE, DL, VT, X86SetCC.getValue(0)); |
49509 | } |
49510 | |
49511 | |
49512 | |
49513 | assert(Cmp.getValueType() == MVT::v16i8 && |
49514 | "Non 128-bit vector on pre-SSE41 target"); |
49515 | SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp); |
49516 | SDValue FFFFs = DAG.getConstant(0xFFFF, DL, MVT::i32); |
49517 | return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC); |
49518 | } |
49519 | |
49520 | return SDValue(); |
49521 | } |
49522 | |
49523 | static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, |
49524 | TargetLowering::DAGCombinerInfo &DCI, |
49525 | const X86Subtarget &Subtarget) { |
49526 | const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); |
49527 | const SDValue LHS = N->getOperand(0); |
49528 | const SDValue RHS = N->getOperand(1); |
49529 | EVT VT = N->getValueType(0); |
49530 | EVT OpVT = LHS.getValueType(); |
49531 | SDLoc DL(N); |
49532 | |
49533 | if (CC == ISD::SETNE || CC == ISD::SETEQ) { |
49534 | if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget)) |
49535 | return V; |
49536 | |
49537 | if (VT == MVT::i1 && isNullConstant(RHS)) { |
49538 | SDValue X86CC; |
49539 | if (SDValue V = |
49540 | MatchVectorAllZeroTest(LHS, CC, DL, Subtarget, DAG, X86CC)) |
49541 | return DAG.getNode(ISD::TRUNCATE, DL, VT, |
49542 | DAG.getNode(X86ISD::SETCC, DL, MVT::i8, X86CC, V)); |
49543 | } |
49544 | |
49545 | if (OpVT.isScalarInteger()) { |
49546 | |
49547 | |
49548 | auto MatchOrCmpEq = [&](SDValue N0, SDValue N1) { |
49549 | if (N0.getOpcode() == ISD::OR && N0->hasOneUse()) { |
49550 | if (N0.getOperand(0) == N1) |
49551 | return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT), |
49552 | N0.getOperand(1)); |
49553 | if (N0.getOperand(1) == N1) |
49554 | return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT), |
49555 | N0.getOperand(0)); |
49556 | } |
49557 | return SDValue(); |
49558 | }; |
49559 | if (SDValue AndN = MatchOrCmpEq(LHS, RHS)) |
49560 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
49561 | if (SDValue AndN = MatchOrCmpEq(RHS, LHS)) |
49562 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
49563 | |
49564 | |
49565 | |
49566 | auto MatchAndCmpEq = [&](SDValue N0, SDValue N1) { |
49567 | if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) { |
49568 | if (N0.getOperand(0) == N1) |
49569 | return DAG.getNode(ISD::AND, DL, OpVT, N1, |
49570 | DAG.getNOT(DL, N0.getOperand(1), OpVT)); |
49571 | if (N0.getOperand(1) == N1) |
49572 | return DAG.getNode(ISD::AND, DL, OpVT, N1, |
49573 | DAG.getNOT(DL, N0.getOperand(0), OpVT)); |
49574 | } |
49575 | return SDValue(); |
49576 | }; |
49577 | if (SDValue AndN = MatchAndCmpEq(LHS, RHS)) |
49578 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
49579 | if (SDValue AndN = MatchAndCmpEq(RHS, LHS)) |
49580 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
49581 | |
49582 | |
49583 | |
49584 | |
49585 | |
49586 | if (LHS.getOpcode() == ISD::TRUNCATE && |
49587 | LHS.getOperand(0).getScalarValueSizeInBits() >= 32 && |
49588 | isNullConstant(RHS) && !DCI.isBeforeLegalize()) { |
49589 | EVT SrcVT = LHS.getOperand(0).getValueType(); |
49590 | APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(), |
49591 | OpVT.getScalarSizeInBits()); |
49592 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
49593 | if (DAG.MaskedValueIsZero(LHS.getOperand(0), UpperBits) && |
49594 | TLI.isTypeLegal(LHS.getOperand(0).getValueType())) |
49595 | return DAG.getSetCC(DL, VT, LHS.getOperand(0), |
49596 | DAG.getConstant(0, DL, SrcVT), CC); |
49597 | } |
49598 | } |
49599 | } |
49600 | |
49601 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
49602 | (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) { |
49603 | |
49604 | |
49605 | SDValue Op0 = LHS; |
49606 | SDValue Op1 = RHS; |
49607 | ISD::CondCode TmpCC = CC; |
49608 | |
49609 | if (Op0.getOpcode() == ISD::BUILD_VECTOR) { |
49610 | std::swap(Op0, Op1); |
49611 | TmpCC = ISD::getSetCCSwappedOperands(TmpCC); |
49612 | } |
49613 | |
49614 | bool IsSEXT0 = |
49615 | (Op0.getOpcode() == ISD::SIGN_EXTEND) && |
49616 | (Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1); |
49617 | bool IsVZero1 = ISD::isBuildVectorAllZeros(Op1.getNode()); |
49618 | |
49619 | if (IsSEXT0 && IsVZero1) { |
49620 | assert(VT == Op0.getOperand(0).getValueType() && |
49621 | "Unexpected operand type"); |
49622 | if (TmpCC == ISD::SETGT) |
49623 | return DAG.getConstant(0, DL, VT); |
49624 | if (TmpCC == ISD::SETLE) |
49625 | return DAG.getConstant(1, DL, VT); |
49626 | if (TmpCC == ISD::SETEQ || TmpCC == ISD::SETGE) |
49627 | return DAG.getNOT(DL, Op0.getOperand(0), VT); |
49628 | |
49629 | assert((TmpCC == ISD::SETNE || TmpCC == ISD::SETLT) && |
49630 | "Unexpected condition code!"); |
49631 | return Op0.getOperand(0); |
49632 | } |
49633 | } |
49634 | |
49635 | |
49636 | |
49637 | |
49638 | |
49639 | |
49640 | if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() && |
49641 | VT.getVectorElementType() == MVT::i1 && |
49642 | (OpVT.getVectorElementType() == MVT::i8 || |
49643 | OpVT.getVectorElementType() == MVT::i16)) { |
49644 | SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC); |
49645 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc); |
49646 | } |
49647 | |
49648 | |
49649 | |
49650 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 && |
49651 | LHS.getValueType() == MVT::v4f32) |
49652 | return LowerVSETCC(SDValue(N, 0), Subtarget, DAG); |
49653 | |
49654 | return SDValue(); |
49655 | } |
49656 | |
49657 | static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, |
49658 | TargetLowering::DAGCombinerInfo &DCI, |
49659 | const X86Subtarget &Subtarget) { |
49660 | SDValue Src = N->getOperand(0); |
49661 | MVT SrcVT = Src.getSimpleValueType(); |
49662 | MVT VT = N->getSimpleValueType(0); |
49663 | unsigned NumBits = VT.getScalarSizeInBits(); |
49664 | unsigned NumElts = SrcVT.getVectorNumElements(); |
49665 | |
49666 | |
49667 | if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) { |
49668 | assert(VT == MVT::i32 && "Unexpected result type"); |
49669 | APInt Imm(32, 0); |
49670 | for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) { |
49671 | if (!Src.getOperand(Idx).isUndef() && |
49672 | Src.getConstantOperandAPInt(Idx).isNegative()) |
49673 | Imm.setBit(Idx); |
49674 | } |
49675 | return DAG.getConstant(Imm, SDLoc(N), VT); |
49676 | } |
49677 | |
49678 | |
49679 | unsigned EltWidth = SrcVT.getScalarSizeInBits(); |
49680 | if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::BITCAST && |
49681 | Src.getOperand(0).getScalarValueSizeInBits() == EltWidth) |
49682 | return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0)); |
49683 | |
49684 | |
49685 | |
49686 | if (SDValue NotSrc = IsNOT(Src, DAG)) { |
49687 | SDLoc DL(N); |
49688 | APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts); |
49689 | NotSrc = DAG.getBitcast(SrcVT, NotSrc); |
49690 | return DAG.getNode(ISD::XOR, DL, VT, |
49691 | DAG.getNode(X86ISD::MOVMSK, DL, VT, NotSrc), |
49692 | DAG.getConstant(NotMask, DL, VT)); |
49693 | } |
49694 | |
49695 | |
49696 | |
49697 | if (Src.getOpcode() == X86ISD::PCMPGT && |
49698 | ISD::isBuildVectorAllOnes(Src.getOperand(1).getNode())) { |
49699 | SDLoc DL(N); |
49700 | APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts); |
49701 | return DAG.getNode(ISD::XOR, DL, VT, |
49702 | DAG.getNode(X86ISD::MOVMSK, DL, VT, Src.getOperand(0)), |
49703 | DAG.getConstant(NotMask, DL, VT)); |
49704 | } |
49705 | |
49706 | |
49707 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
49708 | APInt DemandedMask(APInt::getAllOnesValue(NumBits)); |
49709 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) |
49710 | return SDValue(N, 0); |
49711 | |
49712 | return SDValue(); |
49713 | } |
49714 | |
49715 | static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG, |
49716 | TargetLowering::DAGCombinerInfo &DCI) { |
49717 | |
49718 | SDValue Mask = cast<X86MaskedGatherScatterSDNode>(N)->getMask(); |
49719 | if (Mask.getScalarValueSizeInBits() != 1) { |
49720 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
49721 | APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); |
49722 | if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) { |
49723 | if (N->getOpcode() != ISD::DELETED_NODE) |
49724 | DCI.AddToWorklist(N); |
49725 | return SDValue(N, 0); |
49726 | } |
49727 | } |
49728 | |
49729 | return SDValue(); |
49730 | } |
49731 | |
49732 | static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS, |
49733 | SDValue Index, SDValue Base, SDValue Scale, |
49734 | SelectionDAG &DAG) { |
49735 | SDLoc DL(GorS); |
49736 | |
49737 | if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) { |
49738 | SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(), |
49739 | Gather->getMask(), Base, Index, Scale } ; |
49740 | return DAG.getMaskedGather(Gather->getVTList(), |
49741 | Gather->getMemoryVT(), DL, Ops, |
49742 | Gather->getMemOperand(), |
49743 | Gather->getIndexType(), |
49744 | Gather->getExtensionType()); |
49745 | } |
49746 | auto *Scatter = cast<MaskedScatterSDNode>(GorS); |
49747 | SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(), |
49748 | Scatter->getMask(), Base, Index, Scale }; |
49749 | return DAG.getMaskedScatter(Scatter->getVTList(), |
49750 | Scatter->getMemoryVT(), DL, |
49751 | Ops, Scatter->getMemOperand(), |
49752 | Scatter->getIndexType(), |
49753 | Scatter->isTruncatingStore()); |
49754 | } |
49755 | |
49756 | static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, |
49757 | TargetLowering::DAGCombinerInfo &DCI) { |
49758 | SDLoc DL(N); |
49759 | auto *GorS = cast<MaskedGatherScatterSDNode>(N); |
49760 | SDValue Index = GorS->getIndex(); |
49761 | SDValue Base = GorS->getBasePtr(); |
49762 | SDValue Scale = GorS->getScale(); |
49763 | |
49764 | if (DCI.isBeforeLegalize()) { |
49765 | unsigned IndexWidth = Index.getScalarValueSizeInBits(); |
49766 | |
49767 | |
49768 | |
49769 | |
49770 | |
49771 | |
49772 | |
49773 | |
49774 | if (auto *BV = dyn_cast<BuildVectorSDNode>(Index)) { |
49775 | if (BV->isConstant() && IndexWidth > 32 && |
49776 | DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { |
49777 | EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32); |
49778 | Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); |
49779 | return rebuildGatherScatter(GorS, Index, Base, Scale, DAG); |
49780 | } |
49781 | } |
49782 | |
49783 | |
49784 | |
49785 | |
49786 | if ((Index.getOpcode() == ISD::SIGN_EXTEND || |
49787 | Index.getOpcode() == ISD::ZERO_EXTEND) && |
49788 | IndexWidth > 32 && |
49789 | Index.getOperand(0).getScalarValueSizeInBits() <= 32 && |
49790 | DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { |
49791 | EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32); |
49792 | Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); |
49793 | return rebuildGatherScatter(GorS, Index, Base, Scale, DAG); |
49794 | } |
49795 | } |
49796 | |
49797 | if (DCI.isBeforeLegalizeOps()) { |
49798 | unsigned IndexWidth = Index.getScalarValueSizeInBits(); |
49799 | |
49800 | |
49801 | if (IndexWidth != 32 && IndexWidth != 64) { |
49802 | MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32; |
49803 | EVT IndexVT = Index.getValueType().changeVectorElementType(EltVT); |
49804 | Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); |
49805 | return rebuildGatherScatter(GorS, Index, Base, Scale, DAG); |
49806 | } |
49807 | } |
49808 | |
49809 | |
49810 | SDValue Mask = GorS->getMask(); |
49811 | if (Mask.getScalarValueSizeInBits() != 1) { |
49812 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
49813 | APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); |
49814 | if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) { |
49815 | if (N->getOpcode() != ISD::DELETED_NODE) |
49816 | DCI.AddToWorklist(N); |
49817 | return SDValue(N, 0); |
49818 | } |
49819 | } |
49820 | |
49821 | return SDValue(); |
49822 | } |
49823 | |
49824 | |
49825 | static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, |
49826 | const X86Subtarget &Subtarget) { |
49827 | SDLoc DL(N); |
49828 | X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); |
49829 | SDValue EFLAGS = N->getOperand(1); |
49830 | |
49831 | |
49832 | if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) |
49833 | return getSETCC(CC, Flags, DL, DAG); |
49834 | |
49835 | return SDValue(); |
49836 | } |
49837 | |
49838 | |
49839 | static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, |
49840 | const X86Subtarget &Subtarget) { |
49841 | SDLoc DL(N); |
49842 | SDValue EFLAGS = N->getOperand(3); |
49843 | X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); |
49844 | |
49845 | |
49846 | |
49847 | |
49848 | if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) { |
49849 | SDValue Cond = DAG.getTargetConstant(CC, DL, MVT::i8); |
49850 | return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0), |
49851 | N->getOperand(1), Cond, Flags); |
49852 | } |
49853 | |
49854 | return SDValue(); |
49855 | } |
49856 | |
49857 | |
49858 | static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, |
49859 | SelectionDAG &DAG) { |
49860 | |
49861 | |
49862 | |
49863 | |
49864 | |
49865 | |
49866 | |
49867 | |
49868 | |
49869 | |
49870 | |
49871 | EVT VT = N->getValueType(0); |
49872 | bool IsStrict = N->isStrictFPOpcode(); |
49873 | unsigned NumEltBits = VT.getScalarSizeInBits(); |
49874 | SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); |
49875 | if (!VT.isVector() || Op0.getOpcode() != ISD::AND || |
49876 | DAG.ComputeNumSignBits(Op0.getOperand(0)) != NumEltBits || |
49877 | VT.getSizeInBits() != Op0.getValueSizeInBits()) |
49878 | return SDValue(); |
49879 | |
49880 | |
49881 | |
49882 | |
49883 | |
49884 | if (auto *BV = dyn_cast<BuildVectorSDNode>(Op0.getOperand(1))) { |
49885 | |
49886 | if (!BV->isConstant()) |
49887 | return SDValue(); |
49888 | |
49889 | |
49890 | SDLoc DL(N); |
49891 | EVT IntVT = BV->getValueType(0); |
49892 | |
49893 | |
49894 | SDValue SourceConst; |
49895 | if (IsStrict) |
49896 | SourceConst = DAG.getNode(N->getOpcode(), DL, {VT, MVT::Other}, |
49897 | {N->getOperand(0), SDValue(BV, 0)}); |
49898 | else |
49899 | SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); |
49900 | |
49901 | SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst); |
49902 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0), |
49903 | MaskConst); |
49904 | SDValue Res = DAG.getBitcast(VT, NewAnd); |
49905 | if (IsStrict) |
49906 | return DAG.getMergeValues({Res, SourceConst.getValue(1)}, DL); |
49907 | return Res; |
49908 | } |
49909 | |
49910 | return SDValue(); |
49911 | } |
49912 | |
49913 | |
49914 | |
49915 | |
49916 | static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) { |
49917 | |
49918 | |
49919 | |
49920 | SDValue Trunc = N->getOperand(0); |
49921 | if (!Trunc.hasOneUse() || Trunc.getOpcode() != ISD::TRUNCATE) |
49922 | return SDValue(); |
49923 | |
49924 | SDValue ExtElt = Trunc.getOperand(0); |
49925 | if (!ExtElt.hasOneUse() || ExtElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
49926 | !isNullConstant(ExtElt.getOperand(1))) |
49927 | return SDValue(); |
49928 | |
49929 | EVT TruncVT = Trunc.getValueType(); |
49930 | EVT SrcVT = ExtElt.getValueType(); |
49931 | unsigned DestWidth = TruncVT.getSizeInBits(); |
49932 | unsigned SrcWidth = SrcVT.getSizeInBits(); |
49933 | if (SrcWidth % DestWidth != 0) |
49934 | return SDValue(); |
49935 | |
49936 | |
49937 | EVT SrcVecVT = ExtElt.getOperand(0).getValueType(); |
49938 | unsigned VecWidth = SrcVecVT.getSizeInBits(); |
49939 | unsigned NumElts = VecWidth / DestWidth; |
49940 | EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, NumElts); |
49941 | SDValue BitcastVec = DAG.getBitcast(BitcastVT, ExtElt.getOperand(0)); |
49942 | SDLoc DL(N); |
49943 | SDValue NewExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TruncVT, |
49944 | BitcastVec, ExtElt.getOperand(1)); |
49945 | return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), NewExtElt); |
49946 | } |
49947 | |
49948 | static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, |
49949 | const X86Subtarget &Subtarget) { |
49950 | bool IsStrict = N->isStrictFPOpcode(); |
49951 | SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); |
49952 | EVT VT = N->getValueType(0); |
49953 | EVT InVT = Op0.getValueType(); |
49954 | |
49955 | |
49956 | |
49957 | |
49958 | if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) { |
49959 | unsigned ScalarSize = InVT.getScalarSizeInBits(); |
49960 | if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64) |
49961 | return SDValue(); |
49962 | SDLoc dl(N); |
49963 | EVT DstVT = EVT::getVectorVT(*DAG.getContext(), |
49964 | ScalarSize < 16 ? MVT::i16 |
49965 | : ScalarSize < 32 ? MVT::i32 |
49966 | : MVT::i64, |
49967 | InVT.getVectorNumElements()); |
49968 | SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); |
49969 | if (IsStrict) |
49970 | return DAG.getNode(ISD::STRICT_UINT_TO_FP, dl, {VT, MVT::Other}, |
49971 | {N->getOperand(0), P}); |
49972 | return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P); |
49973 | } |
49974 | |
49975 | |
49976 | |
49977 | |
49978 | if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 && |
49979 | VT.getScalarType() != MVT::f16) { |
49980 | SDLoc dl(N); |
49981 | EVT DstVT = InVT.changeVectorElementType(MVT::i32); |
49982 | SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); |
49983 | |
49984 | |
49985 | if (IsStrict) |
49986 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
49987 | {N->getOperand(0), P}); |
49988 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); |
49989 | } |
49990 | |
49991 | |
49992 | |
49993 | |
49994 | if (DAG.SignBitIsZero(Op0)) { |
49995 | if (IsStrict) |
49996 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other}, |
49997 | {N->getOperand(0), Op0}); |
49998 | return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0); |
49999 | } |
50000 | |
50001 | return SDValue(); |
50002 | } |
50003 | |
50004 | static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, |
50005 | TargetLowering::DAGCombinerInfo &DCI, |
50006 | const X86Subtarget &Subtarget) { |
50007 | |
50008 | |
50009 | bool IsStrict = N->isStrictFPOpcode(); |
50010 | if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG)) |
50011 | return Res; |
50012 | |
50013 | |
50014 | SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); |
50015 | EVT VT = N->getValueType(0); |
50016 | EVT InVT = Op0.getValueType(); |
50017 | |
50018 | |
50019 | |
50020 | |
50021 | if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) { |
50022 | unsigned ScalarSize = InVT.getScalarSizeInBits(); |
50023 | if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64) |
50024 | return SDValue(); |
50025 | SDLoc dl(N); |
50026 | EVT DstVT = EVT::getVectorVT(*DAG.getContext(), |
50027 | ScalarSize < 16 ? MVT::i16 |
50028 | : ScalarSize < 32 ? MVT::i32 |
50029 | : MVT::i64, |
50030 | InVT.getVectorNumElements()); |
50031 | SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); |
50032 | if (IsStrict) |
50033 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
50034 | {N->getOperand(0), P}); |
50035 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); |
50036 | } |
50037 | |
50038 | |
50039 | |
50040 | |
50041 | if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 && |
50042 | VT.getScalarType() != MVT::f16) { |
50043 | SDLoc dl(N); |
50044 | EVT DstVT = InVT.changeVectorElementType(MVT::i32); |
50045 | SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); |
50046 | if (IsStrict) |
50047 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
50048 | {N->getOperand(0), P}); |
50049 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); |
50050 | } |
50051 | |
50052 | |
50053 | |
50054 | |
50055 | if (InVT.getScalarSizeInBits() > 32 && !Subtarget.hasDQI()) { |
50056 | unsigned BitWidth = InVT.getScalarSizeInBits(); |
50057 | unsigned NumSignBits = DAG.ComputeNumSignBits(Op0); |
50058 | if (NumSignBits >= (BitWidth - 31)) { |
50059 | EVT TruncVT = MVT::i32; |
50060 | if (InVT.isVector()) |
50061 | TruncVT = InVT.changeVectorElementType(TruncVT); |
50062 | SDLoc dl(N); |
50063 | if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) { |
50064 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); |
50065 | if (IsStrict) |
50066 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
50067 | {N->getOperand(0), Trunc}); |
50068 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); |
50069 | } |
50070 | |
50071 | |
50072 | assert(InVT == MVT::v2i64 && "Unexpected VT!"); |
50073 | SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0); |
50074 | SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, |
50075 | { 0, 2, -1, -1 }); |
50076 | if (IsStrict) |
50077 | return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, |
50078 | {N->getOperand(0), Shuf}); |
50079 | return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf); |
50080 | } |
50081 | } |
50082 | |
50083 | |
50084 | |
50085 | if (!Subtarget.useSoftFloat() && Subtarget.hasX87() && |
50086 | Op0.getOpcode() == ISD::LOAD) { |
50087 | LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode()); |
50088 | |
50089 | |
50090 | if (VT == MVT::f16 || VT == MVT::f128) |
50091 | return SDValue(); |
50092 | |
50093 | |
50094 | |
50095 | if (Subtarget.hasDQI() && VT != MVT::f80) |
50096 | return SDValue(); |
50097 | |
50098 | if (Ld->isSimple() && !VT.isVector() && ISD::isNormalLoad(Op0.getNode()) && |
50099 | Op0.hasOneUse() && !Subtarget.is64Bit() && InVT == MVT::i64) { |
50100 | std::pair<SDValue, SDValue> Tmp = |
50101 | Subtarget.getTargetLowering()->BuildFILD( |
50102 | VT, InVT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), |
50103 | Ld->getPointerInfo(), Ld->getOriginalAlign(), DAG); |
50104 | DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Tmp.second); |
50105 | return Tmp.first; |
50106 | } |
50107 | } |
50108 | |
50109 | if (IsStrict) |
50110 | return SDValue(); |
50111 | |
50112 | if (SDValue V = combineToFPTruncExtElt(N, DAG)) |
50113 | return V; |
50114 | |
50115 | return SDValue(); |
50116 | } |
50117 | |
50118 | static bool needCarryOrOverflowFlag(SDValue Flags) { |
50119 | assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); |
50120 | |
50121 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
50122 | UI != UE; ++UI) { |
50123 | SDNode *User = *UI; |
50124 | |
50125 | X86::CondCode CC; |
50126 | switch (User->getOpcode()) { |
50127 | default: |
50128 | |
50129 | return true; |
50130 | case X86ISD::SETCC: |
50131 | case X86ISD::SETCC_CARRY: |
50132 | CC = (X86::CondCode)User->getConstantOperandVal(0); |
50133 | break; |
50134 | case X86ISD::BRCOND: |
50135 | CC = (X86::CondCode)User->getConstantOperandVal(2); |
50136 | break; |
50137 | case X86ISD::CMOV: |
50138 | CC = (X86::CondCode)User->getConstantOperandVal(2); |
50139 | break; |
50140 | } |
50141 | |
50142 | switch (CC) { |
50143 | default: break; |
50144 | case X86::COND_A: case X86::COND_AE: |
50145 | case X86::COND_B: case X86::COND_BE: |
50146 | case X86::COND_O: case X86::COND_NO: |
50147 | case X86::COND_G: case X86::COND_GE: |
50148 | case X86::COND_L: case X86::COND_LE: |
50149 | return true; |
50150 | } |
50151 | } |
50152 | |
50153 | return false; |
50154 | } |
50155 | |
50156 | static bool onlyZeroFlagUsed(SDValue Flags) { |
50157 | assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); |
50158 | |
50159 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
50160 | UI != UE; ++UI) { |
50161 | SDNode *User = *UI; |
50162 | |
50163 | unsigned CCOpNo; |
50164 | switch (User->getOpcode()) { |
50165 | default: |
50166 | |
50167 | return false; |
50168 | case X86ISD::SETCC: CCOpNo = 0; break; |
50169 | case X86ISD::SETCC_CARRY: CCOpNo = 0; break; |
50170 | case X86ISD::BRCOND: CCOpNo = 2; break; |
50171 | case X86ISD::CMOV: CCOpNo = 2; break; |
50172 | } |
50173 | |
50174 | X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo); |
50175 | if (CC != X86::COND_E && CC != X86::COND_NE) |
50176 | return false; |
50177 | } |
50178 | |
50179 | return true; |
50180 | } |
50181 | |
50182 | static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) { |
50183 | |
50184 | if (!isNullConstant(N->getOperand(1))) |
50185 | return SDValue(); |
50186 | |
50187 | |
50188 | |
50189 | |
50190 | |
50191 | SDLoc dl(N); |
50192 | SDValue Op = N->getOperand(0); |
50193 | EVT VT = Op.getValueType(); |
50194 | |
50195 | |
50196 | |
50197 | |
50198 | if ((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) && |
50199 | Op.hasOneUse() && isa<ConstantSDNode>(Op.getOperand(1)) && |
50200 | onlyZeroFlagUsed(SDValue(N, 0))) { |
50201 | unsigned BitWidth = VT.getSizeInBits(); |
50202 | const APInt &ShAmt = Op.getConstantOperandAPInt(1); |
50203 | if (ShAmt.ult(BitWidth)) { |
50204 | unsigned MaskBits = BitWidth - ShAmt.getZExtValue(); |
50205 | APInt Mask = Op.getOpcode() == ISD::SRL |
50206 | ? APInt::getHighBitsSet(BitWidth, MaskBits) |
50207 | : APInt::getLowBitsSet(BitWidth, MaskBits); |
50208 | if (Mask.isSignedIntN(32)) { |
50209 | Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), |
50210 | DAG.getConstant(Mask, dl, VT)); |
50211 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
50212 | DAG.getConstant(0, dl, VT)); |
50213 | } |
50214 | } |
50215 | } |
50216 | |
50217 | |
50218 | if (Op.getOpcode() != ISD::TRUNCATE) |
50219 | return SDValue(); |
50220 | |
50221 | SDValue Trunc = Op; |
50222 | Op = Op.getOperand(0); |
50223 | |
50224 | |
50225 | |
50226 | |
50227 | EVT OpVT = Op.getValueType(); |
50228 | APInt UpperBits = |
50229 | APInt::getBitsSetFrom(OpVT.getSizeInBits(), VT.getSizeInBits()); |
50230 | if (OpVT == MVT::i32 && DAG.MaskedValueIsZero(Op, UpperBits) && |
50231 | onlyZeroFlagUsed(SDValue(N, 0))) { |
50232 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
50233 | DAG.getConstant(0, dl, OpVT)); |
50234 | } |
50235 | |
50236 | |
50237 | if (!Trunc.hasOneUse() || !Op.hasOneUse()) |
50238 | return SDValue(); |
50239 | |
50240 | unsigned NewOpc; |
50241 | switch (Op.getOpcode()) { |
50242 | default: return SDValue(); |
50243 | case ISD::AND: |
50244 | |
50245 | |
50246 | if (isa<ConstantSDNode>(Op.getOperand(1))) |
50247 | return SDValue(); |
50248 | NewOpc = X86ISD::AND; |
50249 | break; |
50250 | case ISD::OR: NewOpc = X86ISD::OR; break; |
50251 | case ISD::XOR: NewOpc = X86ISD::XOR; break; |
50252 | case ISD::ADD: |
50253 | |
50254 | if (needCarryOrOverflowFlag(SDValue(N, 0))) |
50255 | return SDValue(); |
50256 | NewOpc = X86ISD::ADD; |
50257 | break; |
50258 | case ISD::SUB: |
50259 | |
50260 | if (needCarryOrOverflowFlag(SDValue(N, 0))) |
50261 | return SDValue(); |
50262 | NewOpc = X86ISD::SUB; |
50263 | break; |
50264 | } |
50265 | |
50266 | |
50267 | SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0)); |
50268 | SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1)); |
50269 | |
50270 | |
50271 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
50272 | Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1); |
50273 | |
50274 | |
50275 | if (NewOpc == X86ISD::AND) |
50276 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
50277 | DAG.getConstant(0, dl, VT)); |
50278 | |
50279 | |
50280 | return Op.getValue(1); |
50281 | } |
50282 | |
50283 | static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, |
50284 | TargetLowering::DAGCombinerInfo &DCI) { |
50285 | assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) && |
50286 | "Expected X86ISD::ADD or X86ISD::SUB"); |
50287 | |
50288 | SDLoc DL(N); |
50289 | SDValue LHS = N->getOperand(0); |
50290 | SDValue RHS = N->getOperand(1); |
50291 | MVT VT = LHS.getSimpleValueType(); |
50292 | unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB; |
50293 | |
50294 | |
50295 | if (!N->hasAnyUseOfValue(1)) { |
50296 | SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS); |
50297 | return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL); |
50298 | } |
50299 | |
50300 | |
50301 | auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { |
50302 | SDValue Ops[] = {N0, N1}; |
50303 | SDVTList VTs = DAG.getVTList(N->getValueType(0)); |
50304 | if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) { |
50305 | SDValue Op(N, 0); |
50306 | if (Negate) |
50307 | Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); |
50308 | DCI.CombineTo(GenericAddSub, Op); |
50309 | } |
50310 | }; |
50311 | MatchGeneric(LHS, RHS, false); |
50312 | MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); |
50313 | |
50314 | return SDValue(); |
50315 | } |
50316 | |
50317 | static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { |
50318 | if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { |
50319 | MVT VT = N->getSimpleValueType(0); |
50320 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
50321 | return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, |
50322 | N->getOperand(0), N->getOperand(1), |
50323 | Flags); |
50324 | } |
50325 | |
50326 | |
50327 | |
50328 | SDValue Op0 = N->getOperand(0); |
50329 | SDValue Op1 = N->getOperand(1); |
50330 | if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op1) && |
50331 | !N->hasAnyUseOfValue(1)) |
50332 | return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), Op0.getOperand(0), |
50333 | Op0.getOperand(1), N->getOperand(2)); |
50334 | |
50335 | return SDValue(); |
50336 | } |
50337 | |
50338 | |
50339 | static SDValue combineADC(SDNode *N, SelectionDAG &DAG, |
50340 | TargetLowering::DAGCombinerInfo &DCI) { |
50341 | |
50342 | |
50343 | |
50344 | if (X86::isZeroNode(N->getOperand(0)) && |
50345 | X86::isZeroNode(N->getOperand(1)) && |
50346 | |
50347 | |
50348 | SDValue(N, 1).use_empty()) { |
50349 | SDLoc DL(N); |
50350 | EVT VT = N->getValueType(0); |
50351 | SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); |
50352 | SDValue Res1 = |
50353 | DAG.getNode(ISD::AND, DL, VT, |
50354 | DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
50355 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
50356 | N->getOperand(2)), |
50357 | DAG.getConstant(1, DL, VT)); |
50358 | return DCI.CombineTo(N, Res1, CarryOut); |
50359 | } |
50360 | |
50361 | if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { |
50362 | MVT VT = N->getSimpleValueType(0); |
50363 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
50364 | return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, |
50365 | N->getOperand(0), N->getOperand(1), |
50366 | Flags); |
50367 | } |
50368 | |
50369 | return SDValue(); |
50370 | } |
50371 | |
50372 | |
50373 | |
50374 | |
50375 | static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { |
50376 | bool IsSub = N->getOpcode() == ISD::SUB; |
50377 | SDValue X = N->getOperand(0); |
50378 | SDValue Y = N->getOperand(1); |
50379 | |
50380 | |
50381 | |
50382 | if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND && |
50383 | Y.getOpcode() != ISD::ZERO_EXTEND) |
50384 | std::swap(X, Y); |
50385 | |
50386 | |
50387 | bool PeekedThroughZext = false; |
50388 | if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) { |
50389 | Y = Y.getOperand(0); |
50390 | PeekedThroughZext = true; |
50391 | } |
50392 | |
50393 | |
50394 | |
50395 | |
50396 | if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC && |
50397 | Y.getOpcode() != X86ISD::SETCC) |
50398 | std::swap(X, Y); |
50399 | |
50400 | if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse()) |
50401 | return SDValue(); |
50402 | |
50403 | SDLoc DL(N); |
50404 | EVT VT = N->getValueType(0); |
50405 | X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); |
50406 | |
50407 | |
50408 | |
50409 | auto *ConstantX = dyn_cast<ConstantSDNode>(X); |
50410 | if (ConstantX) { |
50411 | if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) || |
50412 | (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) { |
50413 | |
50414 | |
50415 | |
50416 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
50417 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
50418 | Y.getOperand(1)); |
50419 | } |
50420 | |
50421 | if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) || |
50422 | (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) { |
50423 | SDValue EFLAGS = Y->getOperand(1); |
50424 | if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && |
50425 | EFLAGS.getValueType().isInteger() && |
50426 | !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { |
50427 | |
50428 | |
50429 | |
50430 | SDValue NewSub = DAG.getNode( |
50431 | X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), |
50432 | EFLAGS.getOperand(1), EFLAGS.getOperand(0)); |
50433 | SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); |
50434 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
50435 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
50436 | NewEFLAGS); |
50437 | } |
50438 | } |
50439 | } |
50440 | |
50441 | if (CC == X86::COND_B) { |
50442 | |
50443 | |
50444 | return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, |
50445 | DAG.getVTList(VT, MVT::i32), X, |
50446 | DAG.getConstant(0, DL, VT), Y.getOperand(1)); |
50447 | } |
50448 | |
50449 | if (CC == X86::COND_A) { |
50450 | SDValue EFLAGS = Y.getOperand(1); |
50451 | |
50452 | |
50453 | |
50454 | |
50455 | |
50456 | |
50457 | if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && |
50458 | EFLAGS.getValueType().isInteger() && |
50459 | !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { |
50460 | SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), |
50461 | EFLAGS.getNode()->getVTList(), |
50462 | EFLAGS.getOperand(1), EFLAGS.getOperand(0)); |
50463 | SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); |
50464 | return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, |
50465 | DAG.getVTList(VT, MVT::i32), X, |
50466 | DAG.getConstant(0, DL, VT), NewEFLAGS); |
50467 | } |
50468 | } |
50469 | |
50470 | if (CC == X86::COND_AE) { |
50471 | |
50472 | |
50473 | return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, |
50474 | DAG.getVTList(VT, MVT::i32), X, |
50475 | DAG.getConstant(-1, DL, VT), Y.getOperand(1)); |
50476 | } |
50477 | |
50478 | if (CC == X86::COND_BE) { |
50479 | |
50480 | |
50481 | SDValue EFLAGS = Y.getOperand(1); |
50482 | |
50483 | |
50484 | |
50485 | |
50486 | |
50487 | |
50488 | if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && |
50489 | EFLAGS.getValueType().isInteger() && |
50490 | !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { |
50491 | SDValue NewSub = DAG.getNode( |
50492 | X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), |
50493 | EFLAGS.getOperand(1), EFLAGS.getOperand(0)); |
50494 | SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); |
50495 | return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, |
50496 | DAG.getVTList(VT, MVT::i32), X, |
50497 | DAG.getConstant(-1, DL, VT), NewEFLAGS); |
50498 | } |
50499 | } |
50500 | |
50501 | if (CC != X86::COND_E && CC != X86::COND_NE) |
50502 | return SDValue(); |
50503 | |
50504 | SDValue Cmp = Y.getOperand(1); |
50505 | if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() || |
50506 | !X86::isZeroNode(Cmp.getOperand(1)) || |
50507 | !Cmp.getOperand(0).getValueType().isInteger()) |
50508 | return SDValue(); |
50509 | |
50510 | SDValue Z = Cmp.getOperand(0); |
50511 | EVT ZVT = Z.getValueType(); |
50512 | |
50513 | |
50514 | |
50515 | if (ConstantX) { |
50516 | |
50517 | |
50518 | |
50519 | |
50520 | if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) || |
50521 | (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) { |
50522 | SDValue Zero = DAG.getConstant(0, DL, ZVT); |
50523 | SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); |
50524 | SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); |
50525 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
50526 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
50527 | SDValue(Neg.getNode(), 1)); |
50528 | } |
50529 | |
50530 | |
50531 | |
50532 | |
50533 | |
50534 | if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) || |
50535 | (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) { |
50536 | SDValue One = DAG.getConstant(1, DL, ZVT); |
50537 | SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); |
50538 | SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); |
50539 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
50540 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
50541 | Cmp1.getValue(1)); |
50542 | } |
50543 | } |
50544 | |
50545 | |
50546 | SDValue One = DAG.getConstant(1, DL, ZVT); |
50547 | SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); |
50548 | SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); |
50549 | |
50550 | |
50551 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
50552 | |
50553 | |
50554 | |
50555 | if (CC == X86::COND_NE) |
50556 | return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X, |
50557 | DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1)); |
50558 | |
50559 | |
50560 | |
50561 | return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X, |
50562 | DAG.getConstant(0, DL, VT), Cmp1.getValue(1)); |
50563 | } |
50564 | |
50565 | static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, |
50566 | const SDLoc &DL, EVT VT, |
50567 | const X86Subtarget &Subtarget) { |
50568 | |
50569 | |
50570 | |
50571 | |
50572 | |
50573 | |
50574 | |
50575 | |
50576 | |
50577 | |
50578 | |
50579 | if (!Subtarget.hasSSE2()) |
50580 | return SDValue(); |
50581 | |
50582 | if (Op0.getOpcode() != ISD::BUILD_VECTOR || |
50583 | Op1.getOpcode() != ISD::BUILD_VECTOR) |
50584 | return SDValue(); |
50585 | |
50586 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || |
50587 | VT.getVectorNumElements() < 4 || |
50588 | !isPowerOf2_32(VT.getVectorNumElements())) |
50589 | return SDValue(); |
50590 | |
50591 | |
50592 | |
50593 | |
50594 | |
50595 | |
50596 | |
50597 | |
50598 | |
50599 | |
50600 | |
50601 | |
50602 | SDValue Mul; |
50603 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; i += 2) { |
50604 | SDValue Op0L = Op0->getOperand(i), Op1L = Op1->getOperand(i), |
50605 | Op0H = Op0->getOperand(i + 1), Op1H = Op1->getOperand(i + 1); |
50606 | |
50607 | if (Op0L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
50608 | Op1L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
50609 | Op0H.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
50610 | Op1H.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
50611 | return SDValue(); |
50612 | auto *Const0L = dyn_cast<ConstantSDNode>(Op0L->getOperand(1)); |
50613 | auto *Const1L = dyn_cast<ConstantSDNode>(Op1L->getOperand(1)); |
50614 | auto *Const0H = dyn_cast<ConstantSDNode>(Op0H->getOperand(1)); |
50615 | auto *Const1H = dyn_cast<ConstantSDNode>(Op1H->getOperand(1)); |
50616 | if (!Const0L || !Const1L || !Const0H || !Const1H) |
50617 | return SDValue(); |
50618 | unsigned Idx0L = Const0L->getZExtValue(), Idx1L = Const1L->getZExtValue(), |
50619 | Idx0H = Const0H->getZExtValue(), Idx1H = Const1H->getZExtValue(); |
50620 | |
50621 | if (Idx0L > Idx1L) |
50622 | std::swap(Idx0L, Idx1L); |
50623 | if (Idx0H > Idx1H) |
50624 | std::swap(Idx0H, Idx1H); |
50625 | |
50626 | if (Idx0L > Idx0H) { |
50627 | std::swap(Idx0L, Idx0H); |
50628 | std::swap(Idx1L, Idx1H); |
50629 | } |
50630 | if (Idx0L != 2 * i || Idx1L != 2 * i + 1 || Idx0H != 2 * i + 2 || |
50631 | Idx1H != 2 * i + 3) |
50632 | return SDValue(); |
50633 | if (!Mul) { |
50634 | |
50635 | |
50636 | |
50637 | Mul = Op0L->getOperand(0); |
50638 | if (Mul->getOpcode() != ISD::MUL || |
50639 | Mul.getValueType().getVectorNumElements() != 2 * e) |
50640 | return SDValue(); |
50641 | } |
50642 | |
50643 | if (Mul != Op0L->getOperand(0) || Mul != Op1L->getOperand(0) || |
50644 | Mul != Op0H->getOperand(0) || Mul != Op1H->getOperand(0)) |
50645 | return SDValue(); |
50646 | } |
50647 | |
50648 | |
50649 | ShrinkMode Mode; |
50650 | if (!canReduceVMulWidth(Mul.getNode(), DAG, Mode) || |
50651 | Mode == ShrinkMode::MULU16) |
50652 | return SDValue(); |
50653 | |
50654 | EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
50655 | VT.getVectorNumElements() * 2); |
50656 | SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(0)); |
50657 | SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(1)); |
50658 | |
50659 | auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
50660 | ArrayRef<SDValue> Ops) { |
50661 | EVT InVT = Ops[0].getValueType(); |
50662 | assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); |
50663 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, |
50664 | InVT.getVectorNumElements() / 2); |
50665 | return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]); |
50666 | }; |
50667 | return SplitOpsAndApply(DAG, Subtarget, DL, VT, { N0, N1 }, PMADDBuilder); |
50668 | } |
50669 | |
50670 | |
50671 | |
50672 | |
50673 | static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, |
50674 | const SDLoc &DL, EVT VT, |
50675 | const X86Subtarget &Subtarget) { |
50676 | if (!Subtarget.hasSSE2()) |
50677 | return SDValue(); |
50678 | |
50679 | if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL) |
50680 | return SDValue(); |
50681 | |
50682 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || |
50683 | VT.getVectorNumElements() < 4 || |
50684 | !isPowerOf2_32(VT.getVectorNumElements())) |
50685 | return SDValue(); |
50686 | |
50687 | SDValue N00 = N0.getOperand(0); |
50688 | SDValue N01 = N0.getOperand(1); |
50689 | SDValue N10 = N1.getOperand(0); |
50690 | SDValue N11 = N1.getOperand(1); |
50691 | |
50692 | |
50693 | |
50694 | if (N00.getOpcode() != ISD::SIGN_EXTEND || |
50695 | N01.getOpcode() != ISD::SIGN_EXTEND || |
50696 | N10.getOpcode() != ISD::SIGN_EXTEND || |
50697 | N11.getOpcode() != ISD::SIGN_EXTEND) |
50698 | return SDValue(); |
50699 | |
50700 | |
50701 | N00 = N00.getOperand(0); |
50702 | N01 = N01.getOperand(0); |
50703 | N10 = N10.getOperand(0); |
50704 | N11 = N11.getOperand(0); |
50705 | |
50706 | |
50707 | EVT InVT = N00.getValueType(); |
50708 | if (InVT.getVectorElementType() != MVT::i16 || N01.getValueType() != InVT || |
50709 | N10.getValueType() != InVT || N11.getValueType() != InVT) |
50710 | return SDValue(); |
50711 | |
50712 | |
50713 | if (N00.getOpcode() != ISD::BUILD_VECTOR || |
50714 | N01.getOpcode() != ISD::BUILD_VECTOR || |
50715 | N10.getOpcode() != ISD::BUILD_VECTOR || |
50716 | N11.getOpcode() != ISD::BUILD_VECTOR) |
50717 | return SDValue(); |
50718 | |
50719 | |
50720 | |
50721 | |
50722 | |
50723 | |
50724 | |
50725 | SDValue In0, In1; |
50726 | for (unsigned i = 0; i != N00.getNumOperands(); ++i) { |
50727 | SDValue N00Elt = N00.getOperand(i); |
50728 | SDValue N01Elt = N01.getOperand(i); |
50729 | SDValue N10Elt = N10.getOperand(i); |
50730 | SDValue N11Elt = N11.getOperand(i); |
50731 | |
50732 | if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
50733 | N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
50734 | N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
50735 | N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
50736 | return SDValue(); |
50737 | auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1)); |
50738 | auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1)); |
50739 | auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1)); |
50740 | auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1)); |
50741 | if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt) |
50742 | return SDValue(); |
50743 | unsigned IdxN00 = ConstN00Elt->getZExtValue(); |
50744 | unsigned IdxN01 = ConstN01Elt->getZExtValue(); |
50745 | unsigned IdxN10 = ConstN10Elt->getZExtValue(); |
50746 | unsigned IdxN11 = ConstN11Elt->getZExtValue(); |
50747 | |
50748 | if (IdxN00 > IdxN10) { |
50749 | std::swap(IdxN00, IdxN10); |
50750 | std::swap(IdxN01, IdxN11); |
50751 | } |
50752 | |
50753 | if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || |
50754 | IdxN01 != 2 * i || IdxN11 != 2 * i + 1) |
50755 | return SDValue(); |
50756 | SDValue N00In = N00Elt.getOperand(0); |
50757 | SDValue N01In = N01Elt.getOperand(0); |
50758 | SDValue N10In = N10Elt.getOperand(0); |
50759 | SDValue N11In = N11Elt.getOperand(0); |
50760 | |
50761 | |
50762 | if (!In0) { |
50763 | In0 = N00In; |
50764 | In1 = N01In; |
50765 | |
50766 | |
50767 | |
50768 | if (In0.getValueSizeInBits() < VT.getSizeInBits() || |
50769 | In1.getValueSizeInBits() < VT.getSizeInBits()) |
50770 | return SDValue(); |
50771 | } |
50772 | |
50773 | |
50774 | if (In0 != N00In) |
50775 | std::swap(N00In, N01In); |
50776 | if (In0 != N10In) |
50777 | std::swap(N10In, N11In); |
50778 | if (In0 != N00In || In1 != N01In || In0 != N10In || In1 != N11In) |
50779 | return SDValue(); |
50780 | } |
50781 | |
50782 | auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
50783 | ArrayRef<SDValue> Ops) { |
50784 | EVT OpVT = Ops[0].getValueType(); |
50785 | assert(OpVT.getScalarType() == MVT::i16 && |
50786 | "Unexpected scalar element type"); |
50787 | assert(OpVT == Ops[1].getValueType() && "Operands' types mismatch"); |
50788 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, |
50789 | OpVT.getVectorNumElements() / 2); |
50790 | return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]); |
50791 | }; |
50792 | |
50793 | |
50794 | |
50795 | EVT OutVT16 = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
50796 | VT.getVectorNumElements() * 2); |
50797 | if (OutVT16.bitsLT(In0.getValueType())) { |
50798 | In0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In0, |
50799 | DAG.getIntPtrConstant(0, DL)); |
50800 | } |
50801 | if (OutVT16.bitsLT(In1.getValueType())) { |
50802 | In1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In1, |
50803 | DAG.getIntPtrConstant(0, DL)); |
50804 | } |
50805 | return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 }, |
50806 | PMADDBuilder); |
50807 | } |
50808 | |
50809 | |
50810 | |
50811 | |
50812 | |
50813 | static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) { |
50814 | |
50815 | |
50816 | |
50817 | |
50818 | |
50819 | auto isSuitableCmov = [](SDValue V) { |
50820 | if (V.getOpcode() != X86ISD::CMOV || !V.hasOneUse()) |
50821 | return false; |
50822 | if (!isa<ConstantSDNode>(V.getOperand(0)) || |
50823 | !isa<ConstantSDNode>(V.getOperand(1))) |
50824 | return false; |
50825 | return isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)) || |
50826 | (V.getConstantOperandAPInt(0).isSignedIntN(32) && |
50827 | V.getConstantOperandAPInt(1).isSignedIntN(32)); |
50828 | }; |
50829 | |
50830 | |
50831 | SDValue Cmov = N->getOperand(0); |
50832 | SDValue OtherOp = N->getOperand(1); |
50833 | if (!isSuitableCmov(Cmov)) |
50834 | std::swap(Cmov, OtherOp); |
50835 | if (!isSuitableCmov(Cmov)) |
50836 | return SDValue(); |
50837 | |
50838 | EVT VT = N->getValueType(0); |
50839 | SDLoc DL(N); |
50840 | SDValue FalseOp = Cmov.getOperand(0); |
50841 | SDValue TrueOp = Cmov.getOperand(1); |
50842 | |
50843 | |
50844 | |
50845 | |
50846 | |
50847 | |
50848 | if (OtherOp.getOpcode() == ISD::ADD && OtherOp.hasOneUse() && |
50849 | !isa<ConstantSDNode>(OtherOp.getOperand(0)) && |
50850 | all_of(N->uses(), [&](SDNode *Use) { |
50851 | auto *MemNode = dyn_cast<MemSDNode>(Use); |
50852 | return MemNode && MemNode->getBasePtr().getNode() == N; |
50853 | })) { |
50854 | |
50855 | |
50856 | |
50857 | SDValue X = OtherOp.getOperand(0), Y = OtherOp.getOperand(1); |
50858 | FalseOp = DAG.getNode(ISD::ADD, DL, VT, X, FalseOp); |
50859 | TrueOp = DAG.getNode(ISD::ADD, DL, VT, X, TrueOp); |
50860 | Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, |
50861 | Cmov.getOperand(2), Cmov.getOperand(3)); |
50862 | return DAG.getNode(ISD::ADD, DL, VT, Cmov, Y); |
50863 | } |
50864 | |
50865 | |
50866 | FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp); |
50867 | TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp); |
50868 | return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2), |
50869 | Cmov.getOperand(3)); |
50870 | } |
50871 | |
50872 | static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, |
50873 | TargetLowering::DAGCombinerInfo &DCI, |
50874 | const X86Subtarget &Subtarget) { |
50875 | EVT VT = N->getValueType(0); |
50876 | SDValue Op0 = N->getOperand(0); |
50877 | SDValue Op1 = N->getOperand(1); |
50878 | |
50879 | if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG)) |
50880 | return Select; |
50881 | |
50882 | if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) |
50883 | return MAdd; |
50884 | if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) |
50885 | return MAdd; |
50886 | |
50887 | |
50888 | if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) |
50889 | return V; |
50890 | |
50891 | |
50892 | |
50893 | |
50894 | |
50895 | |
50896 | if (VT.isVector()) { |
50897 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
50898 | if (Op0.getOpcode() == ISD::ZERO_EXTEND && |
50899 | Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && |
50900 | TLI.isTypeLegal(Op0.getOperand(0).getValueType())) { |
50901 | SDLoc DL(N); |
50902 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0)); |
50903 | return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt); |
50904 | } |
50905 | |
50906 | if (Op1.getOpcode() == ISD::ZERO_EXTEND && |
50907 | Op1.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && |
50908 | TLI.isTypeLegal(Op1.getOperand(0).getValueType())) { |
50909 | SDLoc DL(N); |
50910 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0)); |
50911 | return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt); |
50912 | } |
50913 | } |
50914 | |
50915 | return combineAddOrSubToADCOrSBB(N, DAG); |
50916 | } |
50917 | |
50918 | static SDValue combineSub(SDNode *N, SelectionDAG &DAG, |
50919 | TargetLowering::DAGCombinerInfo &DCI, |
50920 | const X86Subtarget &Subtarget) { |
50921 | SDValue Op0 = N->getOperand(0); |
50922 | SDValue Op1 = N->getOperand(1); |
50923 | |
50924 | |
50925 | auto IsNonOpaqueConstant = [&](SDValue Op) { |
50926 | if (SDNode *C = DAG.isConstantIntBuildVectorOrConstantInt(Op)) { |
50927 | if (auto *Cst = dyn_cast<ConstantSDNode>(C)) |
50928 | return !Cst->isOpaque(); |
50929 | return true; |
50930 | } |
50931 | return false; |
50932 | }; |
50933 | |
50934 | |
50935 | |
50936 | |
50937 | |
50938 | if (Op1.getOpcode() == ISD::XOR && IsNonOpaqueConstant(Op0) && |
50939 | IsNonOpaqueConstant(Op1.getOperand(1)) && Op1->hasOneUse()) { |
50940 | SDLoc DL(N); |
50941 | EVT VT = Op0.getValueType(); |
50942 | SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0), |
50943 | DAG.getNOT(SDLoc(Op1), Op1.getOperand(1), VT)); |
50944 | SDValue NewAdd = |
50945 | DAG.getNode(ISD::ADD, DL, VT, Op0, DAG.getConstant(1, DL, VT)); |
50946 | return DAG.getNode(ISD::ADD, DL, VT, NewXor, NewAdd); |
50947 | } |
50948 | |
50949 | |
50950 | if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) |
50951 | return V; |
50952 | |
50953 | return combineAddOrSubToADCOrSBB(N, DAG); |
50954 | } |
50955 | |
50956 | static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, |
50957 | const X86Subtarget &Subtarget) { |
50958 | MVT VT = N->getSimpleValueType(0); |
50959 | SDLoc DL(N); |
50960 | |
50961 | if (N->getOperand(0) == N->getOperand(1)) { |
50962 | if (N->getOpcode() == X86ISD::PCMPEQ) |
50963 | return DAG.getConstant(-1, DL, VT); |
50964 | if (N->getOpcode() == X86ISD::PCMPGT) |
50965 | return DAG.getConstant(0, DL, VT); |
50966 | } |
50967 | |
50968 | return SDValue(); |
50969 | } |
50970 | |
50971 | |
50972 | |
50973 | |
50974 | static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, |
50975 | ArrayRef<SDValue> Ops, SelectionDAG &DAG, |
50976 | TargetLowering::DAGCombinerInfo &DCI, |
50977 | const X86Subtarget &Subtarget) { |
50978 | assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors"); |
50979 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
50980 | |
50981 | if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) |
50982 | return DAG.getUNDEF(VT); |
50983 | |
50984 | if (llvm::all_of(Ops, [](SDValue Op) { |
50985 | return ISD::isBuildVectorAllZeros(Op.getNode()); |
50986 | })) |
50987 | return getZeroVector(VT, Subtarget, DAG, DL); |
50988 | |
50989 | SDValue Op0 = Ops[0]; |
50990 | bool IsSplat = llvm::all_of(Ops, [&Op0](SDValue Op) { return Op == Op0; }); |
50991 | |
50992 | |
50993 | if (IsSplat && |
50994 | (VT.is256BitVector() || (VT.is512BitVector() && Subtarget.hasAVX512()))) { |
50995 | |
50996 | if (Op0.getOpcode() == X86ISD::VBROADCAST) |
50997 | return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0)); |
50998 | |
50999 | |
51000 | |
51001 | if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD || |
51002 | Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { |
51003 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op0); |
51004 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
51005 | SDValue Ops[] = {MemIntr->getChain(), MemIntr->getBasePtr()}; |
51006 | SDValue BcastLd = DAG.getMemIntrinsicNode(Op0.getOpcode(), DL, Tys, Ops, |
51007 | MemIntr->getMemoryVT(), |
51008 | MemIntr->getMemOperand()); |
51009 | DAG.ReplaceAllUsesOfValueWith( |
51010 | Op0, extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); |
51011 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); |
51012 | return BcastLd; |
51013 | } |
51014 | |
51015 | |
51016 | |
51017 | if (auto *Ld = dyn_cast<LoadSDNode>(Op0)) { |
51018 | if (Ld->isSimple() && !Ld->isNonTemporal() && |
51019 | Ld->getExtensionType() == ISD::NON_EXTLOAD) { |
51020 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
51021 | SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; |
51022 | SDValue BcastLd = |
51023 | DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, |
51024 | Ld->getMemoryVT(), Ld->getMemOperand()); |
51025 | DAG.ReplaceAllUsesOfValueWith( |
51026 | Op0, |
51027 | extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); |
51028 | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1)); |
51029 | return BcastLd; |
51030 | } |
51031 | } |
51032 | |
51033 | |
51034 | if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 && |
51035 | (Subtarget.hasAVX2() || MayFoldLoadIntoBroadcastFromMem( |
51036 | Op0.getOperand(0), VT.getScalarType()))) |
51037 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
51038 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, |
51039 | Op0.getOperand(0), |
51040 | DAG.getIntPtrConstant(0, DL))); |
51041 | |
51042 | |
51043 | if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR && |
51044 | (Subtarget.hasAVX2() || |
51045 | (EltSizeInBits >= 32 && MayFoldLoad(Op0.getOperand(0)))) && |
51046 | Op0.getOperand(0).getValueType() == VT.getScalarType()) |
51047 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0)); |
51048 | |
51049 | |
51050 | |
51051 | if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
51052 | Op0.getOperand(0).getValueType() == VT) { |
51053 | if (Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST || |
51054 | Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) |
51055 | return Op0.getOperand(0); |
51056 | } |
51057 | } |
51058 | |
51059 | |
51060 | |
51061 | |
51062 | if (VT.is256BitVector() && Ops.size() == 2) { |
51063 | SDValue Src0 = peekThroughBitcasts(Ops[0]); |
51064 | SDValue Src1 = peekThroughBitcasts(Ops[1]); |
51065 | if (Src0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
51066 | Src1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
51067 | EVT SrcVT0 = Src0.getOperand(0).getValueType(); |
51068 | EVT SrcVT1 = Src1.getOperand(0).getValueType(); |
51069 | unsigned NumSrcElts0 = SrcVT0.getVectorNumElements(); |
51070 | unsigned NumSrcElts1 = SrcVT1.getVectorNumElements(); |
51071 | if (SrcVT0.is256BitVector() && SrcVT1.is256BitVector() && |
51072 | Src0.getConstantOperandAPInt(1) == (NumSrcElts0 / 2) && |
51073 | Src1.getConstantOperandAPInt(1) == (NumSrcElts1 / 2)) { |
51074 | return DAG.getNode(X86ISD::VPERM2X128, DL, VT, |
51075 | DAG.getBitcast(VT, Src0.getOperand(0)), |
51076 | DAG.getBitcast(VT, Src1.getOperand(0)), |
51077 | DAG.getTargetConstant(0x31, DL, MVT::i8)); |
51078 | } |
51079 | } |
51080 | } |
51081 | |
51082 | |
51083 | |
51084 | |
51085 | if (llvm::all_of(Ops, [Op0](SDValue Op) { |
51086 | return Op.getOpcode() == Op0.getOpcode(); |
51087 | })) { |
51088 | auto ConcatSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) { |
51089 | SmallVector<SDValue> Subs; |
51090 | for (SDValue SubOp : SubOps) |
51091 | Subs.push_back(SubOp.getOperand(I)); |
51092 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); |
51093 | }; |
51094 | |
51095 | unsigned NumOps = Ops.size(); |
51096 | switch (Op0.getOpcode()) { |
51097 | case X86ISD::SHUFP: { |
51098 | |
51099 | if (!IsSplat && VT.getScalarType() == MVT::f32 && |
51100 | llvm::all_of(Ops, [Op0](SDValue Op) { |
51101 | return Op.getOperand(2) == Op0.getOperand(2); |
51102 | })) { |
51103 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
51104 | ConcatSubOperand(VT, Ops, 0), |
51105 | ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); |
51106 | } |
51107 | break; |
51108 | } |
51109 | case X86ISD::PSHUFHW: |
51110 | case X86ISD::PSHUFLW: |
51111 | case X86ISD::PSHUFD: |
51112 | if (!IsSplat && NumOps == 2 && VT.is256BitVector() && |
51113 | Subtarget.hasInt256() && Op0.getOperand(1) == Ops[1].getOperand(1)) { |
51114 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
51115 | ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); |
51116 | } |
51117 | LLVM_FALLTHROUGH; |
51118 | case X86ISD::VPERMILPI: |
51119 | |
51120 | if (!IsSplat && NumOps == 2 && (VT == MVT::v8f32 || VT == MVT::v8i32) && |
51121 | Subtarget.hasAVX() && Op0.getOperand(1) == Ops[1].getOperand(1)) { |
51122 | SDValue Res = DAG.getBitcast(MVT::v8f32, ConcatSubOperand(VT, Ops, 0)); |
51123 | Res = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Res, |
51124 | Op0.getOperand(1)); |
51125 | return DAG.getBitcast(VT, Res); |
51126 | } |
51127 | break; |
51128 | case X86ISD::VPERMV3: |
51129 | if (!IsSplat && NumOps == 2 && VT.is512BitVector()) { |
51130 | MVT OpVT = Op0.getSimpleValueType(); |
51131 | int NumSrcElts = OpVT.getVectorNumElements(); |
51132 | SmallVector<int, 64> ConcatMask; |
51133 | for (unsigned i = 0; i != NumOps; ++i) { |
51134 | SmallVector<int, 64> SubMask; |
51135 | SmallVector<SDValue, 2> SubOps; |
51136 | if (!getTargetShuffleMask(Ops[i].getNode(), OpVT, false, SubOps, |
51137 | SubMask)) |
51138 | break; |
51139 | for (int M : SubMask) { |
51140 | if (0 <= M) { |
51141 | M += M < NumSrcElts ? 0 : NumSrcElts; |
51142 | M += i * NumSrcElts; |
51143 | } |
51144 | ConcatMask.push_back(M); |
51145 | } |
51146 | } |
51147 | if (ConcatMask.size() == (NumOps * NumSrcElts)) { |
51148 | SDValue Src0 = concatSubVectors(Ops[0].getOperand(0), |
51149 | Ops[1].getOperand(0), DAG, DL); |
51150 | SDValue Src1 = concatSubVectors(Ops[0].getOperand(2), |
51151 | Ops[1].getOperand(2), DAG, DL); |
51152 | MVT IntMaskSVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); |
51153 | MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts); |
51154 | SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true); |
51155 | return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1); |
51156 | } |
51157 | } |
51158 | break; |
51159 | case X86ISD::VSHLI: |
51160 | case X86ISD::VSRLI: |
51161 | |
51162 | |
51163 | if (VT == MVT::v4i64 && !Subtarget.hasInt256() && |
51164 | llvm::all_of(Ops, [](SDValue Op) { |
51165 | return Op.getConstantOperandAPInt(1) == 32; |
51166 | })) { |
51167 | SDValue Res = DAG.getBitcast(MVT::v8i32, ConcatSubOperand(VT, Ops, 0)); |
51168 | SDValue Zero = getZeroVector(MVT::v8i32, Subtarget, DAG, DL); |
51169 | if (Op0.getOpcode() == X86ISD::VSHLI) { |
51170 | Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, |
51171 | {8, 0, 8, 2, 8, 4, 8, 6}); |
51172 | } else { |
51173 | Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, |
51174 | {1, 8, 3, 8, 5, 8, 7, 8}); |
51175 | } |
51176 | return DAG.getBitcast(VT, Res); |
51177 | } |
51178 | LLVM_FALLTHROUGH; |
51179 | case X86ISD::VSRAI: |
51180 | if (((VT.is256BitVector() && Subtarget.hasInt256()) || |
51181 | (VT.is512BitVector() && Subtarget.useAVX512Regs() && |
51182 | (EltSizeInBits >= 32 || Subtarget.useBWIRegs()))) && |
51183 | llvm::all_of(Ops, [Op0](SDValue Op) { |
51184 | return Op0.getOperand(1) == Op.getOperand(1); |
51185 | })) { |
51186 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
51187 | ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); |
51188 | } |
51189 | break; |
51190 | case X86ISD::VPERMI: |
51191 | case X86ISD::VROTLI: |
51192 | case X86ISD::VROTRI: |
51193 | if (VT.is512BitVector() && Subtarget.useAVX512Regs() && |
51194 | llvm::all_of(Ops, [Op0](SDValue Op) { |
51195 | return Op0.getOperand(1) == Op.getOperand(1); |
51196 | })) { |
51197 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
51198 | ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); |
51199 | } |
51200 | break; |
51201 | case ISD::AND: |
51202 | case ISD::OR: |
51203 | case ISD::XOR: |
51204 | case X86ISD::ANDNP: |
51205 | |
51206 | if (!IsSplat && VT.is512BitVector()) { |
51207 | MVT SrcVT = Op0.getOperand(0).getSimpleValueType(); |
51208 | SrcVT = MVT::getVectorVT(SrcVT.getScalarType(), |
51209 | NumOps * SrcVT.getVectorNumElements()); |
51210 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
51211 | ConcatSubOperand(SrcVT, Ops, 0), |
51212 | ConcatSubOperand(SrcVT, Ops, 1)); |
51213 | } |
51214 | break; |
51215 | case X86ISD::HADD: |
51216 | case X86ISD::HSUB: |
51217 | case X86ISD::FHADD: |
51218 | case X86ISD::FHSUB: |
51219 | case X86ISD::PACKSS: |
51220 | case X86ISD::PACKUS: |
51221 | if (!IsSplat && VT.is256BitVector() && |
51222 | (VT.isFloatingPoint() || Subtarget.hasInt256())) { |
51223 | MVT SrcVT = Op0.getOperand(0).getSimpleValueType(); |
51224 | SrcVT = MVT::getVectorVT(SrcVT.getScalarType(), |
51225 | NumOps * SrcVT.getVectorNumElements()); |
51226 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
51227 | ConcatSubOperand(SrcVT, Ops, 0), |
51228 | ConcatSubOperand(SrcVT, Ops, 1)); |
51229 | } |
51230 | break; |
51231 | case X86ISD::PALIGNR: |
51232 | if (!IsSplat && |
51233 | ((VT.is256BitVector() && Subtarget.hasInt256()) || |
51234 | (VT.is512BitVector() && Subtarget.useBWIRegs())) && |
51235 | llvm::all_of(Ops, [Op0](SDValue Op) { |
51236 | return Op0.getOperand(2) == Op.getOperand(2); |
51237 | })) { |
51238 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
51239 | ConcatSubOperand(VT, Ops, 0), |
51240 | ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); |
51241 | } |
51242 | break; |
51243 | } |
51244 | } |
51245 | |
51246 | |
51247 | |
51248 | if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) { |
51249 | bool Fast; |
51250 | const X86TargetLowering *TLI = Subtarget.getTargetLowering(); |
51251 | if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, |
51252 | *FirstLd->getMemOperand(), &Fast) && |
51253 | Fast) { |
51254 | if (SDValue Ld = |
51255 | EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false)) |
51256 | return Ld; |
51257 | } |
51258 | } |
51259 | |
51260 | return SDValue(); |
51261 | } |
51262 | |
51263 | static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG, |
51264 | TargetLowering::DAGCombinerInfo &DCI, |
51265 | const X86Subtarget &Subtarget) { |
51266 | EVT VT = N->getValueType(0); |
51267 | EVT SrcVT = N->getOperand(0).getValueType(); |
51268 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
51269 | |
51270 | |
51271 | if (VT.getVectorElementType() == MVT::i1) |
51272 | return SDValue(); |
51273 | |
51274 | if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) { |
51275 | SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); |
51276 | if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG, |
51277 | DCI, Subtarget)) |
51278 | return R; |
51279 | } |
51280 | |
51281 | return SDValue(); |
51282 | } |
51283 | |
51284 | static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, |
51285 | TargetLowering::DAGCombinerInfo &DCI, |
51286 | const X86Subtarget &Subtarget) { |
51287 | if (DCI.isBeforeLegalizeOps()) |
51288 | return SDValue(); |
51289 | |
51290 | MVT OpVT = N->getSimpleValueType(0); |
51291 | |
51292 | bool IsI1Vector = OpVT.getVectorElementType() == MVT::i1; |
51293 | |
51294 | SDLoc dl(N); |
51295 | SDValue Vec = N->getOperand(0); |
51296 | SDValue SubVec = N->getOperand(1); |
51297 | |
51298 | uint64_t IdxVal = N->getConstantOperandVal(2); |
51299 | MVT SubVecVT = SubVec.getSimpleValueType(); |
51300 | |
51301 | if (Vec.isUndef() && SubVec.isUndef()) |
51302 | return DAG.getUNDEF(OpVT); |
51303 | |
51304 | |
51305 | if ((Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())) && |
51306 | (SubVec.isUndef() || ISD::isBuildVectorAllZeros(SubVec.getNode()))) |
51307 | return getZeroVector(OpVT, Subtarget, DAG, dl); |
51308 | |
51309 | if (ISD::isBuildVectorAllZeros(Vec.getNode())) { |
51310 | |
51311 | |
51312 | if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR && |
51313 | ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) { |
51314 | uint64_t Idx2Val = SubVec.getConstantOperandVal(2); |
51315 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, |
51316 | getZeroVector(OpVT, Subtarget, DAG, dl), |
51317 | SubVec.getOperand(1), |
51318 | DAG.getIntPtrConstant(IdxVal + Idx2Val, dl)); |
51319 | } |
51320 | |
51321 | |
51322 | |
51323 | |
51324 | |
51325 | if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 && |
51326 | isNullConstant(SubVec.getOperand(1)) && |
51327 | SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) { |
51328 | SDValue Ins = SubVec.getOperand(0); |
51329 | if (isNullConstant(Ins.getOperand(2)) && |
51330 | ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) && |
51331 | Ins.getOperand(1).getValueSizeInBits().getFixedSize() <= |
51332 | SubVecVT.getFixedSizeInBits()) |
51333 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, |
51334 | getZeroVector(OpVT, Subtarget, DAG, dl), |
51335 | Ins.getOperand(1), N->getOperand(2)); |
51336 | } |
51337 | } |
51338 | |
51339 | |
51340 | if (IsI1Vector) |
51341 | return SDValue(); |
51342 | |
51343 | |
51344 | |
51345 | if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
51346 | SubVec.getOperand(0).getSimpleValueType() == OpVT && |
51347 | (IdxVal != 0 || |
51348 | !(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) { |
51349 | int ExtIdxVal = SubVec.getConstantOperandVal(1); |
51350 | if (ExtIdxVal != 0) { |
51351 | int VecNumElts = OpVT.getVectorNumElements(); |
51352 | int SubVecNumElts = SubVecVT.getVectorNumElements(); |
51353 | SmallVector<int, 64> Mask(VecNumElts); |
51354 | |
51355 | for (int i = 0; i != VecNumElts; ++i) |
51356 | Mask[i] = i; |
51357 | |
51358 | for (int i = 0; i != SubVecNumElts; ++i) |
51359 | Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts; |
51360 | |
51361 | return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask); |
51362 | } |
51363 | } |
51364 | |
51365 | |
51366 | SmallVector<SDValue, 2> SubVectorOps; |
51367 | if (collectConcatOps(N, SubVectorOps)) { |
51368 | if (SDValue Fold = |
51369 | combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget)) |
51370 | return Fold; |
51371 | |
51372 | |
51373 | |
51374 | |
51375 | |
51376 | |
51377 | if (SubVectorOps.size() == 2 && |
51378 | ISD::isBuildVectorAllZeros(SubVectorOps[1].getNode())) |
51379 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, |
51380 | getZeroVector(OpVT, Subtarget, DAG, dl), |
51381 | SubVectorOps[0], DAG.getIntPtrConstant(0, dl)); |
51382 | } |
51383 | |
51384 | |
51385 | if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST) |
51386 | return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); |
51387 | |
51388 | |
51389 | |
51390 | if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() && |
51391 | SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) { |
51392 | auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec); |
51393 | SDVTList Tys = DAG.getVTList(OpVT, MVT::Other); |
51394 | SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; |
51395 | SDValue BcastLd = |
51396 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, |
51397 | MemIntr->getMemoryVT(), |
51398 | MemIntr->getMemOperand()); |
51399 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); |
51400 | return BcastLd; |
51401 | } |
51402 | |
51403 | |
51404 | |
51405 | if (IdxVal == (OpVT.getVectorNumElements() / 2) && SubVec.hasOneUse() && |
51406 | Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits())) { |
51407 | auto *VecLd = dyn_cast<LoadSDNode>(Vec); |
51408 | auto *SubLd = dyn_cast<LoadSDNode>(SubVec); |
51409 | if (VecLd && SubLd && |
51410 | DAG.areNonVolatileConsecutiveLoads(SubLd, VecLd, |
51411 | SubVec.getValueSizeInBits() / 8, 0)) |
51412 | return getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, dl, OpVT, SubVecVT, |
51413 | SubLd, 0, DAG); |
51414 | } |
51415 | |
51416 | return SDValue(); |
51417 | } |
51418 | |
51419 | |
51420 | |
51421 | |
51422 | |
51423 | |
51424 | |
51425 | static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) { |
51426 | SDValue Sel = peekThroughBitcasts(Ext->getOperand(0)); |
51427 | SmallVector<SDValue, 4> CatOps; |
51428 | if (Sel.getOpcode() != ISD::VSELECT || |
51429 | !collectConcatOps(Sel.getOperand(0).getNode(), CatOps)) |
51430 | return SDValue(); |
51431 | |
51432 | |
51433 | |
51434 | |
51435 | MVT VT = Ext->getSimpleValueType(0); |
51436 | if (!VT.is128BitVector()) |
51437 | return SDValue(); |
51438 | |
51439 | MVT SelCondVT = Sel.getOperand(0).getSimpleValueType(); |
51440 | if (!SelCondVT.is256BitVector() && !SelCondVT.is512BitVector()) |
51441 | return SDValue(); |
51442 | |
51443 | MVT WideVT = Ext->getOperand(0).getSimpleValueType(); |
51444 | MVT SelVT = Sel.getSimpleValueType(); |
51445 | assert((SelVT.is256BitVector() || SelVT.is512BitVector()) && |
51446 | "Unexpected vector type with legal operations"); |
51447 | |
51448 | unsigned SelElts = SelVT.getVectorNumElements(); |
51449 | unsigned CastedElts = WideVT.getVectorNumElements(); |
51450 | unsigned ExtIdx = Ext->getConstantOperandVal(1); |
51451 | if (SelElts % CastedElts == 0) { |
51452 | |
51453 | |
51454 | ExtIdx *= (SelElts / CastedElts); |
51455 | } else if (CastedElts % SelElts == 0) { |
51456 | |
51457 | |
51458 | unsigned IndexDivisor = CastedElts / SelElts; |
51459 | if (ExtIdx % IndexDivisor != 0) |
51460 | return SDValue(); |
51461 | ExtIdx /= IndexDivisor; |
51462 | } else { |
51463 | llvm_unreachable("Element count of simple vector types are not divisible?"); |
51464 | } |
51465 | |
51466 | unsigned NarrowingFactor = WideVT.getSizeInBits() / VT.getSizeInBits(); |
51467 | unsigned NarrowElts = SelElts / NarrowingFactor; |
51468 | MVT NarrowSelVT = MVT::getVectorVT(SelVT.getVectorElementType(), NarrowElts); |
51469 | SDLoc DL(Ext); |
51470 | SDValue ExtCond = extract128BitVector(Sel.getOperand(0), ExtIdx, DAG, DL); |
51471 | SDValue ExtT = extract128BitVector(Sel.getOperand(1), ExtIdx, DAG, DL); |
51472 | SDValue ExtF = extract128BitVector(Sel.getOperand(2), ExtIdx, DAG, DL); |
51473 | SDValue NarrowSel = DAG.getSelect(DL, NarrowSelVT, ExtCond, ExtT, ExtF); |
51474 | return DAG.getBitcast(VT, NarrowSel); |
51475 | } |
51476 | |
51477 | static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, |
51478 | TargetLowering::DAGCombinerInfo &DCI, |
51479 | const X86Subtarget &Subtarget) { |
51480 | |
51481 | |
51482 | |
51483 | |
51484 | |
51485 | |
51486 | |
51487 | |
51488 | |
51489 | |
51490 | if (!N->getValueType(0).isSimple()) |
51491 | return SDValue(); |
51492 | |
51493 | MVT VT = N->getSimpleValueType(0); |
51494 | SDValue InVec = N->getOperand(0); |
51495 | unsigned IdxVal = N->getConstantOperandVal(1); |
51496 | SDValue InVecBC = peekThroughBitcasts(InVec); |
51497 | EVT InVecVT = InVec.getValueType(); |
51498 | unsigned SizeInBits = VT.getSizeInBits(); |
51499 | unsigned InSizeInBits = InVecVT.getSizeInBits(); |
51500 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
51501 | |
51502 | if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && |
51503 | TLI.isTypeLegal(InVecVT) && |
51504 | InSizeInBits == 256 && InVecBC.getOpcode() == ISD::AND) { |
51505 | auto isConcatenatedNot = [](SDValue V) { |
51506 | V = peekThroughBitcasts(V); |
51507 | if (!isBitwiseNot(V)) |
51508 | return false; |
51509 | SDValue NotOp = V->getOperand(0); |
51510 | return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS; |
51511 | }; |
51512 | if (isConcatenatedNot(InVecBC.getOperand(0)) || |
51513 | isConcatenatedNot(InVecBC.getOperand(1))) { |
51514 | |
51515 | SDValue Concat = splitVectorIntBinary(InVecBC, DAG); |
51516 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, |
51517 | DAG.getBitcast(InVecVT, Concat), N->getOperand(1)); |
51518 | } |
51519 | } |
51520 | |
51521 | if (DCI.isBeforeLegalizeOps()) |
51522 | return SDValue(); |
51523 | |
51524 | if (SDValue V = narrowExtractedVectorSelect(N, DAG)) |
51525 | return V; |
51526 | |
51527 | if (ISD::isBuildVectorAllZeros(InVec.getNode())) |
51528 | return getZeroVector(VT, Subtarget, DAG, SDLoc(N)); |
51529 | |
51530 | if (ISD::isBuildVectorAllOnes(InVec.getNode())) { |
51531 | if (VT.getScalarType() == MVT::i1) |
51532 | return DAG.getConstant(1, SDLoc(N), VT); |
51533 | return getOnesVector(VT, DAG, SDLoc(N)); |
51534 | } |
51535 | |
51536 | if (InVec.getOpcode() == ISD::BUILD_VECTOR) |
51537 | return DAG.getBuildVector( |
51538 | VT, SDLoc(N), |
51539 | InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements())); |
51540 | |
51541 | |
51542 | |
51543 | |
51544 | if (VT.getVectorElementType() != MVT::i1 && |
51545 | InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 && |
51546 | InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) && |
51547 | ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) && |
51548 | InVec.getOperand(1).getValueSizeInBits() <= SizeInBits) { |
51549 | SDLoc DL(N); |
51550 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
51551 | getZeroVector(VT, Subtarget, DAG, DL), |
51552 | InVec.getOperand(1), InVec.getOperand(2)); |
51553 | } |
51554 | |
51555 | |
51556 | |
51557 | |
51558 | if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST || |
51559 | InVec.getOpcode() == X86ISD::VBROADCAST_LOAD || |
51560 | DAG.isSplatValue(InVec, false))) |
51561 | return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits); |
51562 | |
51563 | |
51564 | if (IdxVal != 0 && InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && |
51565 | cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT) |
51566 | return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits); |
51567 | |
51568 | |
51569 | if ((InSizeInBits % SizeInBits) == 0 && |
51570 | (IdxVal % VT.getVectorNumElements()) == 0) { |
51571 | SmallVector<int, 32> ShuffleMask; |
51572 | SmallVector<int, 32> ScaledMask; |
51573 | SmallVector<SDValue, 2> ShuffleInputs; |
51574 | unsigned NumSubVecs = InSizeInBits / SizeInBits; |
51575 | |
51576 | if (getTargetShuffleInputs(InVecBC, ShuffleInputs, ShuffleMask, DAG) && |
51577 | scaleShuffleElements(ShuffleMask, NumSubVecs, ScaledMask)) { |
51578 | unsigned SubVecIdx = IdxVal / VT.getVectorNumElements(); |
51579 | if (ScaledMask[SubVecIdx] == SM_SentinelUndef) |
51580 | return DAG.getUNDEF(VT); |
51581 | if (ScaledMask[SubVecIdx] == SM_SentinelZero) |
51582 | return getZeroVector(VT, Subtarget, DAG, SDLoc(N)); |
51583 | SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs]; |
51584 | if (Src.getValueSizeInBits() == InSizeInBits) { |
51585 | unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs; |
51586 | unsigned SrcEltIdx = SrcSubVecIdx * VT.getVectorNumElements(); |
51587 | return extractSubVector(DAG.getBitcast(InVecVT, Src), SrcEltIdx, DAG, |
51588 | SDLoc(N), SizeInBits); |
51589 | } |
51590 | } |
51591 | } |
51592 | |
51593 | |
51594 | |
51595 | unsigned InOpcode = InVec.getOpcode(); |
51596 | if (IdxVal == 0 && InVec.hasOneUse()) { |
51597 | if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) { |
51598 | |
51599 | if (InOpcode == ISD::SINT_TO_FP && |
51600 | InVec.getOperand(0).getValueType() == MVT::v4i32) { |
51601 | return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), VT, InVec.getOperand(0)); |
51602 | } |
51603 | |
51604 | if (InOpcode == ISD::UINT_TO_FP && Subtarget.hasVLX() && |
51605 | InVec.getOperand(0).getValueType() == MVT::v4i32) { |
51606 | return DAG.getNode(X86ISD::CVTUI2P, SDLoc(N), VT, InVec.getOperand(0)); |
51607 | } |
51608 | |
51609 | if (InOpcode == ISD::FP_EXTEND && |
51610 | InVec.getOperand(0).getValueType() == MVT::v4f32) { |
51611 | return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), VT, InVec.getOperand(0)); |
51612 | } |
51613 | } |
51614 | if ((InOpcode == ISD::ANY_EXTEND || |
51615 | InOpcode == ISD::ANY_EXTEND_VECTOR_INREG || |
51616 | InOpcode == ISD::ZERO_EXTEND || |
51617 | InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || |
51618 | InOpcode == ISD::SIGN_EXTEND || |
51619 | InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) && |
51620 | (SizeInBits == 128 || SizeInBits == 256) && |
51621 | InVec.getOperand(0).getValueSizeInBits() >= SizeInBits) { |
51622 | SDLoc DL(N); |
51623 | SDValue Ext = InVec.getOperand(0); |
51624 | if (Ext.getValueSizeInBits() > SizeInBits) |
51625 | Ext = extractSubVector(Ext, 0, DAG, DL, SizeInBits); |
51626 | unsigned ExtOp = getOpcode_EXTEND_VECTOR_INREG(InOpcode); |
51627 | return DAG.getNode(ExtOp, DL, VT, Ext); |
51628 | } |
51629 | if (InOpcode == ISD::VSELECT && |
51630 | InVec.getOperand(0).getValueType().is256BitVector() && |
51631 | InVec.getOperand(1).getValueType().is256BitVector() && |
51632 | InVec.getOperand(2).getValueType().is256BitVector()) { |
51633 | SDLoc DL(N); |
51634 | SDValue Ext0 = extractSubVector(InVec.getOperand(0), 0, DAG, DL, 128); |
51635 | SDValue Ext1 = extractSubVector(InVec.getOperand(1), 0, DAG, DL, 128); |
51636 | SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128); |
51637 | return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2); |
51638 | } |
51639 | if (InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() && |
51640 | (VT.is128BitVector() || VT.is256BitVector())) { |
51641 | SDLoc DL(N); |
51642 | SDValue InVecSrc = InVec.getOperand(0); |
51643 | unsigned Scale = InVecSrc.getValueSizeInBits() / InSizeInBits; |
51644 | SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits); |
51645 | return DAG.getNode(InOpcode, DL, VT, Ext); |
51646 | } |
51647 | } |
51648 | |
51649 | |
51650 | |
51651 | if ((InOpcode == X86ISD::VSHLI || InOpcode == X86ISD::VSRLI) && |
51652 | InVecVT.getScalarSizeInBits() == 64 && |
51653 | InVec.getConstantOperandAPInt(1) == 32) { |
51654 | SDLoc DL(N); |
51655 | SDValue Ext = |
51656 | extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits); |
51657 | return DAG.getNode(InOpcode, DL, VT, Ext, InVec.getOperand(1)); |
51658 | } |
51659 | |
51660 | return SDValue(); |
51661 | } |
51662 | |
51663 | static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { |
51664 | EVT VT = N->getValueType(0); |
51665 | SDValue Src = N->getOperand(0); |
51666 | SDLoc DL(N); |
51667 | |
51668 | |
51669 | |
51670 | |
51671 | |
51672 | if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse()) |
51673 | if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1))) |
51674 | if (C->getAPIntValue().isOneValue()) |
51675 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, |
51676 | Src.getOperand(0)); |
51677 | |
51678 | |
51679 | if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
51680 | Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() && |
51681 | Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
51682 | if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1))) |
51683 | if (C->isNullValue()) |
51684 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0), |
51685 | Src.getOperand(1)); |
51686 | |
51687 | |
51688 | |
51689 | if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
51690 | auto IsAnyExt64 = [](SDValue Op) { |
51691 | if (Op.getValueType() != MVT::i64 || !Op.hasOneUse()) |
51692 | return SDValue(); |
51693 | if (Op.getOpcode() == ISD::ANY_EXTEND && |
51694 | Op.getOperand(0).getScalarValueSizeInBits() <= 32) |
51695 | return Op.getOperand(0); |
51696 | if (auto *Ld = dyn_cast<LoadSDNode>(Op)) |
51697 | if (Ld->getExtensionType() == ISD::EXTLOAD && |
51698 | Ld->getMemoryVT().getScalarSizeInBits() <= 32) |
51699 | return Op; |
51700 | return SDValue(); |
51701 | }; |
51702 | if (SDValue ExtSrc = IsAnyExt64(peekThroughOneUseBitcasts(Src))) |
51703 | return DAG.getBitcast( |
51704 | VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32, |
51705 | DAG.getAnyExtOrTrunc(ExtSrc, DL, MVT::i32))); |
51706 | } |
51707 | |
51708 | |
51709 | if (VT == MVT::v2i64 && Src.getOpcode() == ISD::BITCAST && |
51710 | Src.getOperand(0).getValueType() == MVT::x86mmx) |
51711 | return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, Src.getOperand(0)); |
51712 | |
51713 | |
51714 | |
51715 | if (VT.getScalarType() == Src.getValueType()) |
51716 | for (SDNode *User : Src->uses()) |
51717 | if (User->getOpcode() == X86ISD::VBROADCAST && |
51718 | Src == User->getOperand(0)) { |
51719 | unsigned SizeInBits = VT.getFixedSizeInBits(); |
51720 | unsigned BroadcastSizeInBits = |
51721 | User->getValueSizeInBits(0).getFixedSize(); |
51722 | if (BroadcastSizeInBits == SizeInBits) |
51723 | return SDValue(User, 0); |
51724 | if (BroadcastSizeInBits > SizeInBits) |
51725 | return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits); |
51726 | |
51727 | |
51728 | } |
51729 | |
51730 | return SDValue(); |
51731 | } |
51732 | |
51733 | |
51734 | static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, |
51735 | TargetLowering::DAGCombinerInfo &DCI, |
51736 | const X86Subtarget &Subtarget) { |
51737 | SDValue LHS = N->getOperand(0); |
51738 | SDValue RHS = N->getOperand(1); |
51739 | |
51740 | |
51741 | if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) && |
51742 | !DAG.isConstantIntBuildVectorOrConstantInt(RHS)) |
51743 | return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS); |
51744 | |
51745 | |
51746 | |
51747 | if (ISD::isBuildVectorAllZeros(RHS.getNode())) |
51748 | return DAG.getConstant(0, SDLoc(N), N->getValueType(0)); |
51749 | |
51750 | |
51751 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
51752 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI)) |
51753 | return SDValue(N, 0); |
51754 | |
51755 | |
51756 | |
51757 | |
51758 | |
51759 | |
51760 | |
51761 | |
51762 | if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() && |
51763 | (LHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || |
51764 | LHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && |
51765 | LHS.getOperand(0).getValueType() == MVT::v4i32) { |
51766 | SDLoc dl(N); |
51767 | LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0), |
51768 | LHS.getOperand(0), { 0, -1, 1, -1 }); |
51769 | LHS = DAG.getBitcast(MVT::v2i64, LHS); |
51770 | return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); |
51771 | } |
51772 | if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() && |
51773 | (RHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || |
51774 | RHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && |
51775 | RHS.getOperand(0).getValueType() == MVT::v4i32) { |
51776 | SDLoc dl(N); |
51777 | RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0), |
51778 | RHS.getOperand(0), { 0, -1, 1, -1 }); |
51779 | RHS = DAG.getBitcast(MVT::v2i64, RHS); |
51780 | return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); |
51781 | } |
51782 | |
51783 | return SDValue(); |
51784 | } |
51785 | |
51786 | |
51787 | static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG, |
51788 | TargetLowering::DAGCombinerInfo &DCI) { |
51789 | SDValue LHS = N->getOperand(0); |
51790 | SDValue RHS = N->getOperand(1); |
51791 | |
51792 | |
51793 | |
51794 | if (ISD::isBuildVectorAllZeros(LHS.getNode()) || |
51795 | ISD::isBuildVectorAllZeros(RHS.getNode())) |
51796 | return DAG.getConstant(0, SDLoc(N), N->getValueType(0)); |
51797 | |
51798 | return SDValue(); |
51799 | } |
51800 | |
51801 | static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, |
51802 | TargetLowering::DAGCombinerInfo &DCI, |
51803 | const X86Subtarget &Subtarget) { |
51804 | EVT VT = N->getValueType(0); |
51805 | SDValue In = N->getOperand(0); |
51806 | unsigned Opcode = N->getOpcode(); |
51807 | unsigned InOpcode = In.getOpcode(); |
51808 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
51809 | |
51810 | |
51811 | if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) && |
51812 | In.hasOneUse()) { |
51813 | auto *Ld = cast<LoadSDNode>(In); |
51814 | if (Ld->isSimple()) { |
51815 | MVT SVT = In.getSimpleValueType().getVectorElementType(); |
51816 | ISD::LoadExtType Ext = Opcode == ISD::SIGN_EXTEND_VECTOR_INREG |
51817 | ? ISD::SEXTLOAD |
51818 | : ISD::ZEXTLOAD; |
51819 | EVT MemVT = VT.changeVectorElementType(SVT); |
51820 | if (TLI.isLoadExtLegal(Ext, VT, MemVT)) { |
51821 | SDValue Load = |
51822 | DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(), |
51823 | Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(), |
51824 | Ld->getMemOperand()->getFlags()); |
51825 | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); |
51826 | return Load; |
51827 | } |
51828 | } |
51829 | } |
51830 | |
51831 | |
51832 | if (Opcode == InOpcode) |
51833 | return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0)); |
51834 | |
51835 | |
51836 | |
51837 | |
51838 | if (InOpcode == ISD::EXTRACT_SUBVECTOR && In.getConstantOperandVal(1) == 0 && |
51839 | In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) && |
51840 | In.getOperand(0).getOperand(0).getValueSizeInBits() == |
51841 | In.getValueSizeInBits()) |
51842 | return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0)); |
51843 | |
51844 | |
51845 | |
51846 | if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG || |
51847 | (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG && Subtarget.hasSSE41())) { |
51848 | SDValue Op(N, 0); |
51849 | if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType())) |
51850 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
51851 | return Res; |
51852 | } |
51853 | |
51854 | return SDValue(); |
51855 | } |
51856 | |
51857 | static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG, |
51858 | TargetLowering::DAGCombinerInfo &DCI) { |
51859 | EVT VT = N->getValueType(0); |
51860 | |
51861 | if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) |
51862 | return DAG.getConstant(0, SDLoc(N), VT); |
51863 | |
51864 | APInt KnownUndef, KnownZero; |
51865 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
51866 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
51867 | if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, |
51868 | KnownZero, DCI)) |
51869 | return SDValue(N, 0); |
51870 | |
51871 | return SDValue(); |
51872 | } |
51873 | |
51874 | |
51875 | |
51876 | |
51877 | static SDValue combineFP16_TO_FP(SDNode *N, SelectionDAG &DAG, |
51878 | const X86Subtarget &Subtarget) { |
51879 | if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) |
51880 | return SDValue(); |
51881 | |
51882 | if (N->getOperand(0).getOpcode() != ISD::FP_TO_FP16) |
51883 | return SDValue(); |
51884 | |
51885 | if (N->getValueType(0) != MVT::f32 || |
51886 | N->getOperand(0).getOperand(0).getValueType() != MVT::f32) |
51887 | return SDValue(); |
51888 | |
51889 | SDLoc dl(N); |
51890 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, |
51891 | N->getOperand(0).getOperand(0)); |
51892 | Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res, |
51893 | DAG.getTargetConstant(4, dl, MVT::i32)); |
51894 | Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res); |
51895 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, |
51896 | DAG.getIntPtrConstant(0, dl)); |
51897 | } |
51898 | |
51899 | static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG, |
51900 | const X86Subtarget &Subtarget) { |
51901 | if (!Subtarget.hasF16C() || Subtarget.useSoftFloat()) |
51902 | return SDValue(); |
51903 | |
51904 | if (Subtarget.hasFP16()) |
51905 | return SDValue(); |
51906 | |
51907 | bool IsStrict = N->isStrictFPOpcode(); |
51908 | EVT VT = N->getValueType(0); |
51909 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
51910 | EVT SrcVT = Src.getValueType(); |
51911 | |
51912 | if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::f16) |
51913 | return SDValue(); |
51914 | |
51915 | if (VT.getVectorElementType() != MVT::f32 && |
51916 | VT.getVectorElementType() != MVT::f64) |
51917 | return SDValue(); |
51918 | |
51919 | unsigned NumElts = VT.getVectorNumElements(); |
51920 | if (NumElts == 1 || !isPowerOf2_32(NumElts)) |
51921 | return SDValue(); |
51922 | |
51923 | SDLoc dl(N); |
51924 | |
51925 | |
51926 | EVT IntVT = SrcVT.changeVectorElementTypeToInteger(); |
51927 | Src = DAG.getBitcast(IntVT, Src); |
51928 | |
51929 | |
51930 | if (NumElts < 8) { |
51931 | unsigned NumConcats = 8 / NumElts; |
51932 | SDValue Fill = NumElts == 4 ? DAG.getUNDEF(IntVT) |
51933 | : DAG.getConstant(0, dl, IntVT); |
51934 | SmallVector<SDValue, 4> Ops(NumConcats, Fill); |
51935 | Ops[0] = Src; |
51936 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, Ops); |
51937 | } |
51938 | |
51939 | |
51940 | EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, |
51941 | std::max(4U, NumElts)); |
51942 | SDValue Cvt, Chain; |
51943 | if (IsStrict) { |
51944 | Cvt = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {CvtVT, MVT::Other}, |
51945 | {N->getOperand(0), Src}); |
51946 | Chain = Cvt.getValue(1); |
51947 | } else { |
51948 | Cvt = DAG.getNode(X86ISD::CVTPH2PS, dl, CvtVT, Src); |
51949 | } |
51950 | |
51951 | if (NumElts < 4) { |
51952 | assert(NumElts == 2 && "Unexpected size"); |
51953 | Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Cvt, |
51954 | DAG.getIntPtrConstant(0, dl)); |
51955 | } |
51956 | |
51957 | if (IsStrict) { |
51958 | |
51959 | if (Cvt.getValueType() != VT) { |
51960 | Cvt = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {VT, MVT::Other}, |
51961 | {Chain, Cvt}); |
51962 | Chain = Cvt.getValue(1); |
51963 | } |
51964 | return DAG.getMergeValues({Cvt, Chain}, dl); |
51965 | } |
51966 | |
51967 | |
51968 | return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt); |
51969 | } |
51970 | |
51971 | |
51972 | |
51973 | |
51974 | static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG, |
51975 | TargetLowering::DAGCombinerInfo &DCI) { |
51976 | assert((N->getOpcode() == X86ISD::VBROADCAST_LOAD || |
51977 | N->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) && |
51978 | "Unknown broadcast load type"); |
51979 | |
51980 | |
51981 | if (N->hasAnyUseOfValue(1)) |
51982 | return SDValue(); |
51983 | |
51984 | auto *MemIntrin = cast<MemIntrinsicSDNode>(N); |
51985 | |
51986 | SDValue Ptr = MemIntrin->getBasePtr(); |
51987 | SDValue Chain = MemIntrin->getChain(); |
51988 | EVT VT = N->getSimpleValueType(0); |
51989 | EVT MemVT = MemIntrin->getMemoryVT(); |
51990 | |
51991 | |
51992 | |
51993 | for (SDNode *User : Ptr->uses()) |
51994 | if (User != N && User->getOpcode() == N->getOpcode() && |
51995 | cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr && |
51996 | cast<MemIntrinsicSDNode>(User)->getChain() == Chain && |
51997 | cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() == |
51998 | MemVT.getSizeInBits() && |
51999 | !User->hasAnyUseOfValue(1) && |
52000 | User->getValueSizeInBits(0).getFixedSize() > VT.getFixedSizeInBits()) { |
52001 | SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N), |
52002 | VT.getSizeInBits()); |
52003 | Extract = DAG.getBitcast(VT, Extract); |
52004 | return DCI.CombineTo(N, Extract, SDValue(User, 1)); |
52005 | } |
52006 | |
52007 | return SDValue(); |
52008 | } |
52009 | |
52010 | static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, |
52011 | const X86Subtarget &Subtarget) { |
52012 | if (!Subtarget.hasF16C() || Subtarget.useSoftFloat()) |
52013 | return SDValue(); |
52014 | |
52015 | if (Subtarget.hasFP16()) |
52016 | return SDValue(); |
52017 | |
52018 | EVT VT = N->getValueType(0); |
52019 | SDValue Src = N->getOperand(0); |
52020 | EVT SrcVT = Src.getValueType(); |
52021 | |
52022 | if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || |
52023 | SrcVT.getVectorElementType() != MVT::f32) |
52024 | return SDValue(); |
52025 | |
52026 | unsigned NumElts = VT.getVectorNumElements(); |
52027 | if (NumElts == 1 || !isPowerOf2_32(NumElts)) |
52028 | return SDValue(); |
52029 | |
52030 | SDLoc dl(N); |
52031 | |
52032 | |
52033 | if (NumElts < 4) |
52034 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, |
52035 | DAG.getConstantFP(0.0, dl, SrcVT)); |
52036 | |
52037 | |
52038 | EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
52039 | std::max(8U, NumElts)); |
52040 | SDValue Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src, |
52041 | DAG.getTargetConstant(4, dl, MVT::i32)); |
52042 | |
52043 | |
52044 | if (NumElts < 8) { |
52045 | EVT IntVT = VT.changeVectorElementTypeToInteger(); |
52046 | Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, IntVT, Cvt, |
52047 | DAG.getIntPtrConstant(0, dl)); |
52048 | } |
52049 | |
52050 | return DAG.getBitcast(VT, Cvt); |
52051 | } |
52052 | |
52053 | static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) { |
52054 | SDValue Src = N->getOperand(0); |
52055 | |
52056 | |
52057 | if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) { |
52058 | LoadSDNode *LN = cast<LoadSDNode>(Src.getNode()); |
52059 | |
52060 | if (LN->isSimple()) { |
52061 | SDValue NewLd = DAG.getLoad(MVT::x86mmx, SDLoc(N), LN->getChain(), |
52062 | LN->getBasePtr(), |
52063 | LN->getPointerInfo(), |
52064 | LN->getOriginalAlign(), |
52065 | LN->getMemOperand()->getFlags()); |
52066 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), NewLd.getValue(1)); |
52067 | return NewLd; |
52068 | } |
52069 | } |
52070 | |
52071 | return SDValue(); |
52072 | } |
52073 | |
52074 | static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG, |
52075 | TargetLowering::DAGCombinerInfo &DCI) { |
52076 | unsigned NumBits = N->getSimpleValueType(0).getSizeInBits(); |
52077 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
52078 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), |
52079 | APInt::getAllOnesValue(NumBits), DCI)) |
52080 | return SDValue(N, 0); |
52081 | |
52082 | return SDValue(); |
52083 | } |
52084 | |
52085 | SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, |
52086 | DAGCombinerInfo &DCI) const { |
52087 | SelectionDAG &DAG = DCI.DAG; |
52088 | switch (N->getOpcode()) { |
52089 | default: break; |
52090 | case ISD::SCALAR_TO_VECTOR: |
52091 | return combineScalarToVector(N, DAG); |
52092 | case ISD::EXTRACT_VECTOR_ELT: |
52093 | case X86ISD::PEXTRW: |
52094 | case X86ISD::PEXTRB: |
52095 | return combineExtractVectorElt(N, DAG, DCI, Subtarget); |
52096 | case ISD::CONCAT_VECTORS: |
52097 | return combineConcatVectors(N, DAG, DCI, Subtarget); |
52098 | case ISD::INSERT_SUBVECTOR: |
52099 | return combineInsertSubvector(N, DAG, DCI, Subtarget); |
52100 | case ISD::EXTRACT_SUBVECTOR: |
52101 | return combineExtractSubvector(N, DAG, DCI, Subtarget); |
52102 | case ISD::VSELECT: |
52103 | case ISD::SELECT: |
52104 | case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget); |
52105 | case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); |
52106 | case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); |
52107 | case X86ISD::CMP: return combineCMP(N, DAG); |
52108 | case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget); |
52109 | case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget); |
52110 | case X86ISD::ADD: |
52111 | case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI); |
52112 | case X86ISD::SBB: return combineSBB(N, DAG); |
52113 | case X86ISD::ADC: return combineADC(N, DAG, DCI); |
52114 | case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); |
52115 | case ISD::SHL: return combineShiftLeft(N, DAG); |
52116 | case ISD::SRA: return combineShiftRightArithmetic(N, DAG, Subtarget); |
52117 | case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI, Subtarget); |
52118 | case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); |
52119 | case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); |
52120 | case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); |
52121 | case X86ISD::BEXTR: |
52122 | case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget); |
52123 | case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); |
52124 | case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); |
52125 | case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget); |
52126 | case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); |
52127 | case X86ISD::VEXTRACT_STORE: |
52128 | return combineVEXTRACT_STORE(N, DAG, DCI, Subtarget); |
52129 | case ISD::SINT_TO_FP: |
52130 | case ISD::STRICT_SINT_TO_FP: |
52131 | return combineSIntToFP(N, DAG, DCI, Subtarget); |
52132 | case ISD::UINT_TO_FP: |
52133 | case ISD::STRICT_UINT_TO_FP: |
52134 | return combineUIntToFP(N, DAG, Subtarget); |
52135 | case ISD::FADD: |
52136 | case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); |
52137 | case X86ISD::VFCMULC: |
52138 | case X86ISD::VFMULC: return combineFMulcFCMulc(N, DAG, Subtarget); |
52139 | case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget); |
52140 | case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); |
52141 | case X86ISD::VTRUNC: return combineVTRUNC(N, DAG, DCI); |
52142 | case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); |
52143 | case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); |
52144 | case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); |
52145 | case X86ISD::FXOR: |
52146 | case X86ISD::FOR: return combineFOr(N, DAG, DCI, Subtarget); |
52147 | case X86ISD::FMIN: |
52148 | case X86ISD::FMAX: return combineFMinFMax(N, DAG); |
52149 | case ISD::FMINNUM: |
52150 | case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget); |
52151 | case X86ISD::CVTSI2P: |
52152 | case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI); |
52153 | case X86ISD::CVTP2SI: |
52154 | case X86ISD::CVTP2UI: |
52155 | case X86ISD::STRICT_CVTTP2SI: |
52156 | case X86ISD::CVTTP2SI: |
52157 | case X86ISD::STRICT_CVTTP2UI: |
52158 | case X86ISD::CVTTP2UI: |
52159 | return combineCVTP2I_CVTTP2I(N, DAG, DCI); |
52160 | case X86ISD::STRICT_CVTPH2PS: |
52161 | case X86ISD::CVTPH2PS: return combineCVTPH2PS(N, DAG, DCI); |
52162 | case X86ISD::BT: return combineBT(N, DAG, DCI); |
52163 | case ISD::ANY_EXTEND: |
52164 | case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget); |
52165 | case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget); |
52166 | case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget); |
52167 | case ISD::ANY_EXTEND_VECTOR_INREG: |
52168 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
52169 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
52170 | return combineEXTEND_VECTOR_INREG(N, DAG, DCI, Subtarget); |
52171 | case ISD::SETCC: return combineSetCC(N, DAG, DCI, Subtarget); |
52172 | case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget); |
52173 | case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget); |
52174 | case X86ISD::PACKSS: |
52175 | case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget); |
52176 | case X86ISD::HADD: |
52177 | case X86ISD::HSUB: |
52178 | case X86ISD::FHADD: |
52179 | case X86ISD::FHSUB: return combineVectorHADDSUB(N, DAG, DCI, Subtarget); |
52180 | case X86ISD::VSHL: |
52181 | case X86ISD::VSRA: |
52182 | case X86ISD::VSRL: |
52183 | return combineVectorShiftVar(N, DAG, DCI, Subtarget); |
52184 | case X86ISD::VSHLI: |
52185 | case X86ISD::VSRAI: |
52186 | case X86ISD::VSRLI: |
52187 | return combineVectorShiftImm(N, DAG, DCI, Subtarget); |
52188 | case ISD::INSERT_VECTOR_ELT: |
52189 | case X86ISD::PINSRB: |
52190 | case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); |
52191 | case X86ISD::SHUFP: |
52192 | case X86ISD::INSERTPS: |
52193 | case X86ISD::EXTRQI: |
52194 | case X86ISD::INSERTQI: |
52195 | case X86ISD::VALIGN: |
52196 | case X86ISD::PALIGNR: |
52197 | case X86ISD::VSHLDQ: |
52198 | case X86ISD::VSRLDQ: |
52199 | case X86ISD::BLENDI: |
52200 | case X86ISD::UNPCKH: |
52201 | case X86ISD::UNPCKL: |
52202 | case X86ISD::MOVHLPS: |
52203 | case X86ISD::MOVLHPS: |
52204 | case X86ISD::PSHUFB: |
52205 | case X86ISD::PSHUFD: |
52206 | case X86ISD::PSHUFHW: |
52207 | case X86ISD::PSHUFLW: |
52208 | case X86ISD::MOVSHDUP: |
52209 | case X86ISD::MOVSLDUP: |
52210 | case X86ISD::MOVDDUP: |
52211 | case X86ISD::MOVSS: |
52212 | case X86ISD::MOVSD: |
52213 | case X86ISD::MOVSH: |
52214 | case X86ISD::VBROADCAST: |
52215 | case X86ISD::VPPERM: |
52216 | case X86ISD::VPERMI: |
52217 | case X86ISD::VPERMV: |
52218 | case X86ISD::VPERMV3: |
52219 | case X86ISD::VPERMIL2: |
52220 | case X86ISD::VPERMILPI: |
52221 | case X86ISD::VPERMILPV: |
52222 | case X86ISD::VPERM2X128: |
52223 | case X86ISD::SHUF128: |
52224 | case X86ISD::VZEXT_MOVL: |
52225 | case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget); |
52226 | case X86ISD::FMADD_RND: |
52227 | case X86ISD::FMSUB: |
52228 | case X86ISD::STRICT_FMSUB: |
52229 | case X86ISD::FMSUB_RND: |
52230 | case X86ISD::FNMADD: |
52231 | case X86ISD::STRICT_FNMADD: |
52232 | case X86ISD::FNMADD_RND: |
52233 | case X86ISD::FNMSUB: |
52234 | case X86ISD::STRICT_FNMSUB: |
52235 | case X86ISD::FNMSUB_RND: |
52236 | case ISD::FMA: |
52237 | case ISD::STRICT_FMA: return combineFMA(N, DAG, DCI, Subtarget); |
52238 | case X86ISD::FMADDSUB_RND: |
52239 | case X86ISD::FMSUBADD_RND: |
52240 | case X86ISD::FMADDSUB: |
52241 | case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI); |
52242 | case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget); |
52243 | case X86ISD::MGATHER: |
52244 | case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI); |
52245 | case ISD::MGATHER: |
52246 | case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI); |
52247 | case X86ISD::PCMPEQ: |
52248 | case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); |
52249 | case X86ISD::PMULDQ: |
52250 | case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget); |
52251 | case X86ISD::VPMADDUBSW: |
52252 | case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI); |
52253 | case X86ISD::KSHIFTL: |
52254 | case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI); |
52255 | case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget); |
52256 | case ISD::STRICT_FP_EXTEND: |
52257 | case ISD::FP_EXTEND: return combineFP_EXTEND(N, DAG, Subtarget); |
52258 | case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget); |
52259 | case X86ISD::VBROADCAST_LOAD: |
52260 | case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI); |
52261 | case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG); |
52262 | case X86ISD::PDEP: return combinePDEP(N, DAG, DCI); |
52263 | } |
52264 | |
52265 | return SDValue(); |
52266 | } |
52267 | |
52268 | bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { |
52269 | if (!isTypeLegal(VT)) |
52270 | return false; |
52271 | |
52272 | |
52273 | if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8) |
52274 | return false; |
52275 | |
52276 | |
52277 | |
52278 | |
52279 | |
52280 | |
52281 | |
52282 | |
52283 | |
52284 | if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8) |
52285 | return false; |
52286 | |
52287 | |
52288 | |
52289 | if (VT == MVT::i16) { |
52290 | switch (Opc) { |
52291 | default: |
52292 | break; |
52293 | case ISD::LOAD: |
52294 | case ISD::SIGN_EXTEND: |
52295 | case ISD::ZERO_EXTEND: |
52296 | case ISD::ANY_EXTEND: |
52297 | case ISD::SHL: |
52298 | case ISD::SRA: |
52299 | case ISD::SRL: |
52300 | case ISD::SUB: |
52301 | case ISD::ADD: |
52302 | case ISD::MUL: |
52303 | case ISD::AND: |
52304 | case ISD::OR: |
52305 | case ISD::XOR: |
52306 | return false; |
52307 | } |
52308 | } |
52309 | |
52310 | |
52311 | return true; |
52312 | } |
52313 | |
52314 | SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl, |
52315 | SDValue Value, SDValue Addr, |
52316 | SelectionDAG &DAG) const { |
52317 | const Module *M = DAG.getMachineFunction().getMMI().getModule(); |
52318 | Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); |
52319 | if (IsCFProtectionSupported) { |
52320 | |
52321 | |
52322 | |
52323 | |
52324 | return DAG.getNode(X86ISD::NT_BRIND, dl, MVT::Other, Value, Addr); |
52325 | } |
52326 | |
52327 | return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG); |
52328 | } |
52329 | |
52330 | bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { |
52331 | EVT VT = Op.getValueType(); |
52332 | bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL && |
52333 | isa<ConstantSDNode>(Op.getOperand(1)); |
52334 | |
52335 | |
52336 | |
52337 | |
52338 | |
52339 | if (VT != MVT::i16 && !Is8BitMulByConstant) |
52340 | return false; |
52341 | |
52342 | auto IsFoldableRMW = [](SDValue Load, SDValue Op) { |
52343 | if (!Op.hasOneUse()) |
52344 | return false; |
52345 | SDNode *User = *Op->use_begin(); |
52346 | if (!ISD::isNormalStore(User)) |
52347 | return false; |
52348 | auto *Ld = cast<LoadSDNode>(Load); |
52349 | auto *St = cast<StoreSDNode>(User); |
52350 | return Ld->getBasePtr() == St->getBasePtr(); |
52351 | }; |
52352 | |
52353 | auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) { |
52354 | if (!Load.hasOneUse() || Load.getOpcode() != ISD::ATOMIC_LOAD) |
52355 | return false; |
52356 | if (!Op.hasOneUse()) |
52357 | return false; |
52358 | SDNode *User = *Op->use_begin(); |
52359 | if (User->getOpcode() != ISD::ATOMIC_STORE) |
52360 | return false; |
52361 | auto *Ld = cast<AtomicSDNode>(Load); |
52362 | auto *St = cast<AtomicSDNode>(User); |
52363 | return Ld->getBasePtr() == St->getBasePtr(); |
52364 | }; |
52365 | |
52366 | bool Commute = false; |
52367 | switch (Op.getOpcode()) { |
52368 | default: return false; |
52369 | case ISD::SIGN_EXTEND: |
52370 | case ISD::ZERO_EXTEND: |
52371 | case ISD::ANY_EXTEND: |
52372 | break; |
52373 | case ISD::SHL: |
52374 | case ISD::SRA: |
52375 | case ISD::SRL: { |
52376 | SDValue N0 = Op.getOperand(0); |
52377 | |
52378 | if (MayFoldLoad(N0) && IsFoldableRMW(N0, Op)) |
52379 | return false; |
52380 | break; |
52381 | } |
52382 | case ISD::ADD: |
52383 | case ISD::MUL: |
52384 | case ISD::AND: |
52385 | case ISD::OR: |
52386 | case ISD::XOR: |
52387 | Commute = true; |
52388 | LLVM_FALLTHROUGH; |
52389 | case ISD::SUB: { |
52390 | SDValue N0 = Op.getOperand(0); |
52391 | SDValue N1 = Op.getOperand(1); |
52392 | |
52393 | if (MayFoldLoad(N1) && |
52394 | (!Commute || !isa<ConstantSDNode>(N0) || |
52395 | (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N1, Op)))) |
52396 | return false; |
52397 | if (MayFoldLoad(N0) && |
52398 | ((Commute && !isa<ConstantSDNode>(N1)) || |
52399 | (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op)))) |
52400 | return false; |
52401 | if (IsFoldableAtomicRMW(N0, Op) || |
52402 | (Commute && IsFoldableAtomicRMW(N1, Op))) |
52403 | return false; |
52404 | } |
52405 | } |
52406 | |
52407 | PVT = MVT::i32; |
52408 | return true; |
52409 | } |
52410 | |
52411 | |
52412 | |
52413 | |
52414 | |
52415 | |
52416 | static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) { |
52417 | S = S.substr(S.find_first_not_of(" \t")); |
52418 | |
52419 | for (StringRef Piece : Pieces) { |
52420 | if (!S.startswith(Piece)) |
52421 | return false; |
52422 | |
52423 | S = S.substr(Piece.size()); |
52424 | StringRef::size_type Pos = S.find_first_not_of(" \t"); |
52425 | if (Pos == 0) |
52426 | return false; |
52427 | |
52428 | S = S.substr(Pos); |
52429 | } |
52430 | |
52431 | return S.empty(); |
52432 | } |
52433 | |
52434 | static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) { |
52435 | |
52436 | if (AsmPieces.size() == 3 || AsmPieces.size() == 4) { |
52437 | if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") && |
52438 | std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") && |
52439 | std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) { |
52440 | |
52441 | if (AsmPieces.size() == 3) |
52442 | return true; |
52443 | else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}")) |
52444 | return true; |
52445 | } |
52446 | } |
52447 | return false; |
52448 | } |
52449 | |
52450 | bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { |
52451 | InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand()); |
52452 | |
52453 | const std::string &AsmStr = IA->getAsmString(); |
52454 | |
52455 | IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); |
52456 | if (!Ty || Ty->getBitWidth() % 16 != 0) |
52457 | return false; |
52458 | |
52459 | |
52460 | SmallVector<StringRef, 4> AsmPieces; |
52461 | SplitString(AsmStr, AsmPieces, ";\n"); |
52462 | |
52463 | switch (AsmPieces.size()) { |
52464 | default: return false; |
52465 | case 1: |
52466 | |
52467 | |
52468 | |
52469 | |
52470 | |
52471 | if (matchAsm(AsmPieces[0], {"bswap", "$0"}) || |
52472 | matchAsm(AsmPieces[0], {"bswapl", "$0"}) || |
52473 | matchAsm(AsmPieces[0], {"bswapq", "$0"}) || |
52474 | matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) || |
52475 | matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) || |
52476 | matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) { |
52477 | |
52478 | |
52479 | return IntrinsicLowering::LowerToByteSwap(CI); |
52480 | } |
52481 | |
52482 | |
52483 | if (CI->getType()->isIntegerTy(16) && |
52484 | IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && |
52485 | (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) || |
52486 | matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) { |
52487 | AsmPieces.clear(); |
52488 | StringRef ConstraintsStr = IA->getConstraintString(); |
52489 | SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); |
52490 | array_pod_sort(AsmPieces.begin(), AsmPieces.end()); |
52491 | if (clobbersFlagRegisters(AsmPieces)) |
52492 | return IntrinsicLowering::LowerToByteSwap(CI); |
52493 | } |
52494 | break; |
52495 | case 3: |
52496 | if (CI->getType()->isIntegerTy(32) && |
52497 | IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && |
52498 | matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) && |
52499 | matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) && |
52500 | matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) { |
52501 | AsmPieces.clear(); |
52502 | StringRef ConstraintsStr = IA->getConstraintString(); |
52503 | SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); |
52504 | array_pod_sort(AsmPieces.begin(), AsmPieces.end()); |
52505 | if (clobbersFlagRegisters(AsmPieces)) |
52506 | return IntrinsicLowering::LowerToByteSwap(CI); |
52507 | } |
52508 | |
52509 | if (CI->getType()->isIntegerTy(64)) { |
52510 | InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); |
52511 | if (Constraints.size() >= 2 && |
52512 | Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && |
52513 | Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { |
52514 | |
52515 | if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) && |
52516 | matchAsm(AsmPieces[1], {"bswap", "%edx"}) && |
52517 | matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"})) |
52518 | return IntrinsicLowering::LowerToByteSwap(CI); |
52519 | } |
52520 | } |
52521 | break; |
52522 | } |
52523 | return false; |
52524 | } |
52525 | |
52526 | static X86::CondCode parseConstraintCode(llvm::StringRef Constraint) { |
52527 | X86::CondCode Cond = StringSwitch<X86::CondCode>(Constraint) |
52528 | .Case("{@cca}", X86::COND_A) |
52529 | .Case("{@ccae}", X86::COND_AE) |
52530 | .Case("{@ccb}", X86::COND_B) |
52531 | .Case("{@ccbe}", X86::COND_BE) |
52532 | .Case("{@ccc}", X86::COND_B) |
52533 | .Case("{@cce}", X86::COND_E) |
52534 | .Case("{@ccz}", X86::COND_E) |
52535 | .Case("{@ccg}", X86::COND_G) |
52536 | .Case("{@ccge}", X86::COND_GE) |
52537 | .Case("{@ccl}", X86::COND_L) |
52538 | .Case("{@ccle}", X86::COND_LE) |
52539 | .Case("{@ccna}", X86::COND_BE) |
52540 | .Case("{@ccnae}", X86::COND_B) |
52541 | .Case("{@ccnb}", X86::COND_AE) |
52542 | .Case("{@ccnbe}", X86::COND_A) |
52543 | .Case("{@ccnc}", X86::COND_AE) |
52544 | .Case("{@ccne}", X86::COND_NE) |
52545 | .Case("{@ccnz}", X86::COND_NE) |
52546 | .Case("{@ccng}", X86::COND_LE) |
52547 | .Case("{@ccnge}", X86::COND_L) |
52548 | .Case("{@ccnl}", X86::COND_GE) |
52549 | .Case("{@ccnle}", X86::COND_G) |
52550 | .Case("{@ccno}", X86::COND_NO) |
52551 | .Case("{@ccnp}", X86::COND_NP) |
52552 | .Case("{@ccns}", X86::COND_NS) |
52553 | .Case("{@cco}", X86::COND_O) |
52554 | .Case("{@ccp}", X86::COND_P) |
52555 | .Case("{@ccs}", X86::COND_S) |
52556 | .Default(X86::COND_INVALID); |
52557 | return Cond; |
52558 | } |
52559 | |
52560 | |
52561 | X86TargetLowering::ConstraintType |
52562 | X86TargetLowering::getConstraintType(StringRef Constraint) const { |
52563 | if (Constraint.size() == 1) { |
52564 | switch (Constraint[0]) { |
52565 | case 'R': |
52566 | case 'q': |
52567 | case 'Q': |
52568 | case 'f': |
52569 | case 't': |
52570 | case 'u': |
52571 | case 'y': |
52572 | case 'x': |
52573 | case 'v': |
52574 | case 'l': |
52575 | case 'k': |
52576 | return C_RegisterClass; |
52577 | case 'a': |
52578 | case 'b': |
52579 | case 'c': |
52580 | case 'd': |
52581 | case 'S': |
52582 | case 'D': |
52583 | case 'A': |
52584 | return C_Register; |
52585 | case 'I': |
52586 | case 'J': |
52587 | case 'K': |
52588 | case 'N': |
52589 | case 'G': |
52590 | case 'L': |
52591 | case 'M': |
52592 | return C_Immediate; |
52593 | case 'C': |
52594 | case 'e': |
52595 | case 'Z': |
52596 | return C_Other; |
52597 | default: |
52598 | break; |
52599 | } |
52600 | } |
52601 | else if (Constraint.size() == 2) { |
52602 | switch (Constraint[0]) { |
52603 | default: |
52604 | break; |
52605 | case 'Y': |
52606 | switch (Constraint[1]) { |
52607 | default: |
52608 | break; |
52609 | case 'z': |
52610 | return C_Register; |
52611 | case 'i': |
52612 | case 'm': |
52613 | case 'k': |
52614 | case 't': |
52615 | case '2': |
52616 | return C_RegisterClass; |
52617 | } |
52618 | } |
52619 | } else if (parseConstraintCode(Constraint) != X86::COND_INVALID) |
52620 | return C_Other; |
52621 | return TargetLowering::getConstraintType(Constraint); |
52622 | } |
52623 | |
52624 | |
52625 | |
52626 | |
52627 | TargetLowering::ConstraintWeight |
52628 | X86TargetLowering::getSingleConstraintMatchWeight( |
52629 | AsmOperandInfo &info, const char *constraint) const { |
52630 | ConstraintWeight weight = CW_Invalid; |
52631 | Value *CallOperandVal = info.CallOperandVal; |
52632 | |
52633 | |
52634 | if (!CallOperandVal) |
52635 | return CW_Default; |
52636 | Type *type = CallOperandVal->getType(); |
52637 | |
52638 | switch (*constraint) { |
52639 | default: |
52640 | weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); |
52641 | LLVM_FALLTHROUGH; |
52642 | case 'R': |
52643 | case 'q': |
52644 | case 'Q': |
52645 | case 'a': |
52646 | case 'b': |
52647 | case 'c': |
52648 | case 'd': |
52649 | case 'S': |
52650 | case 'D': |
52651 | case 'A': |
52652 | if (CallOperandVal->getType()->isIntegerTy()) |
52653 | weight = CW_SpecificReg; |
52654 | break; |
52655 | case 'f': |
52656 | case 't': |
52657 | case 'u': |
52658 | if (type->isFloatingPointTy()) |
52659 | weight = CW_SpecificReg; |
52660 | break; |
52661 | case 'y': |
52662 | if (type->isX86_MMXTy() && Subtarget.hasMMX()) |
52663 | weight = CW_SpecificReg; |
52664 | break; |
52665 | case 'Y': |
52666 | if (StringRef(constraint).size() != 2) |
52667 | break; |
52668 | switch (constraint[1]) { |
52669 | default: |
52670 | return CW_Invalid; |
52671 | |
52672 | case 'z': |
52673 | if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || |
52674 | ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) || |
52675 | ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())) |
52676 | return CW_SpecificReg; |
52677 | return CW_Invalid; |
52678 | |
52679 | case 'k': |
52680 | if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) |
52681 | return CW_Register; |
52682 | return CW_Invalid; |
52683 | |
52684 | case 'm': |
52685 | if (type->isX86_MMXTy() && Subtarget.hasMMX()) |
52686 | return weight; |
52687 | return CW_Invalid; |
52688 | |
52689 | case 'i': |
52690 | case 't': |
52691 | case '2': |
52692 | if (!Subtarget.hasSSE2()) |
52693 | return CW_Invalid; |
52694 | break; |
52695 | } |
52696 | break; |
52697 | case 'v': |
52698 | if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()) |
52699 | weight = CW_Register; |
52700 | LLVM_FALLTHROUGH; |
52701 | case 'x': |
52702 | if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || |
52703 | ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX())) |
52704 | weight = CW_Register; |
52705 | break; |
52706 | case 'k': |
52707 | |
52708 | if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) |
52709 | weight = CW_Register; |
52710 | break; |
52711 | case 'I': |
52712 | if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) { |
52713 | if (C->getZExtValue() <= 31) |
52714 | weight = CW_Constant; |
52715 | } |
52716 | break; |
52717 | case 'J': |
52718 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
52719 | if (C->getZExtValue() <= 63) |
52720 | weight = CW_Constant; |
52721 | } |
52722 | break; |
52723 | case 'K': |
52724 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
52725 | if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) |
52726 | weight = CW_Constant; |
52727 | } |
52728 | break; |
52729 | case 'L': |
52730 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
52731 | if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) |
52732 | weight = CW_Constant; |
52733 | } |
52734 | break; |
52735 | case 'M': |
52736 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
52737 | if (C->getZExtValue() <= 3) |
52738 | weight = CW_Constant; |
52739 | } |
52740 | break; |
52741 | case 'N': |
52742 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
52743 | if (C->getZExtValue() <= 0xff) |
52744 | weight = CW_Constant; |
52745 | } |
52746 | break; |
52747 | case 'G': |
52748 | case 'C': |
52749 | if (isa<ConstantFP>(CallOperandVal)) { |
52750 | weight = CW_Constant; |
52751 | } |
52752 | break; |
52753 | case 'e': |
52754 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
52755 | if ((C->getSExtValue() >= -0x80000000LL) && |
52756 | (C->getSExtValue() <= 0x7fffffffLL)) |
52757 | weight = CW_Constant; |
52758 | } |
52759 | break; |
52760 | case 'Z': |
52761 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
52762 | if (C->getZExtValue() <= 0xffffffff) |
52763 | weight = CW_Constant; |
52764 | } |
52765 | break; |
52766 | } |
52767 | return weight; |
52768 | } |
52769 | |
52770 | |
52771 | |
52772 | |
52773 | const char *X86TargetLowering:: |
52774 | LowerXConstraint(EVT ConstraintVT) const { |
52775 | |
52776 | |
52777 | if (ConstraintVT.isFloatingPoint()) { |
52778 | if (Subtarget.hasSSE1()) |
52779 | return "x"; |
52780 | } |
52781 | |
52782 | return TargetLowering::LowerXConstraint(ConstraintVT); |
52783 | } |
52784 | |
52785 | |
52786 | SDValue X86TargetLowering::LowerAsmOutputForConstraint( |
52787 | SDValue &Chain, SDValue &Flag, const SDLoc &DL, |
52788 | const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { |
52789 | X86::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode); |
52790 | if (Cond == X86::COND_INVALID) |
52791 | return SDValue(); |
52792 | |
52793 | if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() || |
52794 | OpInfo.ConstraintVT.getSizeInBits() < 8) |
52795 | report_fatal_error("Flag output operand is of invalid type"); |
52796 | |
52797 | |
52798 | if (Flag.getNode()) { |
52799 | Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, Flag); |
52800 | Chain = Flag.getValue(1); |
52801 | } else |
52802 | Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32); |
52803 | |
52804 | SDValue CC = getSETCC(Cond, Flag, DL, DAG); |
52805 | |
52806 | SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC); |
52807 | |
52808 | return Result; |
52809 | } |
52810 | |
52811 | |
52812 | |
52813 | void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, |
52814 | std::string &Constraint, |
52815 | std::vector<SDValue>&Ops, |
52816 | SelectionDAG &DAG) const { |
52817 | SDValue Result; |
52818 | |
52819 | |
52820 | if (Constraint.length() > 1) return; |
52821 | |
52822 | char ConstraintLetter = Constraint[0]; |
52823 | switch (ConstraintLetter) { |
52824 | default: break; |
52825 | case 'I': |
52826 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
52827 | if (C->getZExtValue() <= 31) { |
52828 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
52829 | Op.getValueType()); |
52830 | break; |
52831 | } |
52832 | } |
52833 | return; |
52834 | case 'J': |
52835 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
52836 | if (C->getZExtValue() <= 63) { |
52837 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
52838 | Op.getValueType()); |
52839 | break; |
52840 | } |
52841 | } |
52842 | return; |
52843 | case 'K': |
52844 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
52845 | if (isInt<8>(C->getSExtValue())) { |
52846 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
52847 | Op.getValueType()); |
52848 | break; |
52849 | } |
52850 | } |
52851 | return; |
52852 | case 'L': |
52853 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
52854 | if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff || |
52855 | (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) { |
52856 | Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), |
52857 | Op.getValueType()); |
52858 | break; |
52859 | } |
52860 | } |
52861 | return; |
52862 | case 'M': |
52863 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
52864 | if (C->getZExtValue() <= 3) { |
52865 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
52866 | Op.getValueType()); |
52867 | break; |
52868 | } |
52869 | } |
52870 | return; |
52871 | case 'N': |
52872 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
52873 | if (C->getZExtValue() <= 255) { |
52874 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
52875 | Op.getValueType()); |
52876 | break; |
52877 | } |
52878 | } |
52879 | return; |
52880 | case 'O': |
52881 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
52882 | if (C->getZExtValue() <= 127) { |
52883 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
52884 | Op.getValueType()); |
52885 | break; |
52886 | } |
52887 | } |
52888 | return; |
52889 | case 'e': { |
52890 | |
52891 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
52892 | if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), |
52893 | C->getSExtValue())) { |
52894 | |
52895 | Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64); |
52896 | break; |
52897 | } |
52898 | |
52899 | |
52900 | } |
52901 | return; |
52902 | } |
52903 | case 'Z': { |
52904 | |
52905 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
52906 | if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), |
52907 | C->getZExtValue())) { |
52908 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
52909 | Op.getValueType()); |
52910 | break; |
52911 | } |
52912 | } |
52913 | |
52914 | |
52915 | return; |
52916 | } |
52917 | case 'i': { |
52918 | |
52919 | if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { |
52920 | bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1; |
52921 | BooleanContent BCont = getBooleanContents(MVT::i64); |
52922 | ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont) |
52923 | : ISD::SIGN_EXTEND; |
52924 | int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue() |
52925 | : CST->getSExtValue(); |
52926 | Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64); |
52927 | break; |
52928 | } |
52929 | |
52930 | |
52931 | |
52932 | |
52933 | if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC()) |
52934 | return; |
52935 | |
52936 | |
52937 | |
52938 | if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) |
52939 | |
52940 | |
52941 | if (isGlobalStubReference( |
52942 | Subtarget.classifyGlobalReference(GA->getGlobal()))) |
52943 | return; |
52944 | break; |
52945 | } |
52946 | } |
52947 | |
52948 | if (Result.getNode()) { |
52949 | Ops.push_back(Result); |
52950 | return; |
52951 | } |
52952 | return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
52953 | } |
52954 | |
52955 | |
52956 | |
52957 | static bool isGRClass(const TargetRegisterClass &RC) { |
52958 | return RC.hasSuperClassEq(&X86::GR8RegClass) || |
52959 | RC.hasSuperClassEq(&X86::GR16RegClass) || |
52960 | RC.hasSuperClassEq(&X86::GR32RegClass) || |
52961 | RC.hasSuperClassEq(&X86::GR64RegClass) || |
52962 | RC.hasSuperClassEq(&X86::LOW32_ADDR_ACCESS_RBPRegClass); |
52963 | } |
52964 | |
52965 | |
52966 | |
52967 | static bool isFRClass(const TargetRegisterClass &RC) { |
52968 | return RC.hasSuperClassEq(&X86::FR16XRegClass) || |
52969 | RC.hasSuperClassEq(&X86::FR32XRegClass) || |
52970 | RC.hasSuperClassEq(&X86::FR64XRegClass) || |
52971 | RC.hasSuperClassEq(&X86::VR128XRegClass) || |
52972 | RC.hasSuperClassEq(&X86::VR256XRegClass) || |
52973 | RC.hasSuperClassEq(&X86::VR512RegClass); |
52974 | } |
52975 | |
52976 | |
52977 | |
52978 | static bool isVKClass(const TargetRegisterClass &RC) { |
52979 | return RC.hasSuperClassEq(&X86::VK1RegClass) || |
52980 | RC.hasSuperClassEq(&X86::VK2RegClass) || |
52981 | RC.hasSuperClassEq(&X86::VK4RegClass) || |
52982 | RC.hasSuperClassEq(&X86::VK8RegClass) || |
52983 | RC.hasSuperClassEq(&X86::VK16RegClass) || |
52984 | RC.hasSuperClassEq(&X86::VK32RegClass) || |
52985 | RC.hasSuperClassEq(&X86::VK64RegClass); |
52986 | } |
52987 | |
52988 | std::pair<unsigned, const TargetRegisterClass *> |
52989 | X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
52990 | StringRef Constraint, |
52991 | MVT VT) const { |
52992 | |
52993 | |
52994 | if (Constraint.size() == 1) { |
52995 | |
52996 | switch (Constraint[0]) { |
52997 | default: break; |
52998 | |
52999 | case 'A': |
53000 | if (Subtarget.is64Bit()) |
53001 | return std::make_pair(X86::RAX, &X86::GR64_ADRegClass); |
53002 | assert((Subtarget.is32Bit() || Subtarget.is16Bit()) && |
53003 | "Expecting 64, 32 or 16 bit subtarget"); |
53004 | return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); |
53005 | |
53006 | |
53007 | |
53008 | |
53009 | case 'k': |
53010 | if (Subtarget.hasAVX512()) { |
53011 | if (VT == MVT::i1) |
53012 | return std::make_pair(0U, &X86::VK1RegClass); |
53013 | if (VT == MVT::i8) |
53014 | return std::make_pair(0U, &X86::VK8RegClass); |
53015 | if (VT == MVT::i16) |
53016 | return std::make_pair(0U, &X86::VK16RegClass); |
53017 | } |
53018 | if (Subtarget.hasBWI()) { |
53019 | if (VT == MVT::i32) |
53020 | return std::make_pair(0U, &X86::VK32RegClass); |
53021 | if (VT == MVT::i64) |
53022 | return std::make_pair(0U, &X86::VK64RegClass); |
53023 | } |
53024 | break; |
53025 | case 'q': |
53026 | if (Subtarget.is64Bit()) { |
53027 | if (VT == MVT::i8 || VT == MVT::i1) |
53028 | return std::make_pair(0U, &X86::GR8RegClass); |
53029 | if (VT == MVT::i16) |
53030 | return std::make_pair(0U, &X86::GR16RegClass); |
53031 | if (VT == MVT::i32 || VT == MVT::f32) |
53032 | return std::make_pair(0U, &X86::GR32RegClass); |
53033 | if (VT != MVT::f80 && !VT.isVector()) |
53034 | return std::make_pair(0U, &X86::GR64RegClass); |
53035 | break; |
53036 | } |
53037 | LLVM_FALLTHROUGH; |
53038 | |
53039 | case 'Q': |
53040 | if (VT == MVT::i8 || VT == MVT::i1) |
53041 | return std::make_pair(0U, &X86::GR8_ABCD_LRegClass); |
53042 | if (VT == MVT::i16) |
53043 | return std::make_pair(0U, &X86::GR16_ABCDRegClass); |
53044 | if (VT == MVT::i32 || VT == MVT::f32 || |
53045 | (!VT.isVector() && !Subtarget.is64Bit())) |
53046 | return std::make_pair(0U, &X86::GR32_ABCDRegClass); |
53047 | if (VT != MVT::f80 && !VT.isVector()) |
53048 | return std::make_pair(0U, &X86::GR64_ABCDRegClass); |
53049 | break; |
53050 | case 'r': |
53051 | case 'l': |
53052 | if (VT == MVT::i8 || VT == MVT::i1) |
53053 | return std::make_pair(0U, &X86::GR8RegClass); |
53054 | if (VT == MVT::i16) |
53055 | return std::make_pair(0U, &X86::GR16RegClass); |
53056 | if (VT == MVT::i32 || VT == MVT::f32 || |
53057 | (!VT.isVector() && !Subtarget.is64Bit())) |
53058 | return std::make_pair(0U, &X86::GR32RegClass); |
53059 | if (VT != MVT::f80 && !VT.isVector()) |
53060 | return std::make_pair(0U, &X86::GR64RegClass); |
53061 | break; |
53062 | case 'R': |
53063 | if (VT == MVT::i8 || VT == MVT::i1) |
53064 | return std::make_pair(0U, &X86::GR8_NOREXRegClass); |
53065 | if (VT == MVT::i16) |
53066 | return std::make_pair(0U, &X86::GR16_NOREXRegClass); |
53067 | if (VT == MVT::i32 || VT == MVT::f32 || |
53068 | (!VT.isVector() && !Subtarget.is64Bit())) |
53069 | return std::make_pair(0U, &X86::GR32_NOREXRegClass); |
53070 | if (VT != MVT::f80 && !VT.isVector()) |
53071 | return std::make_pair(0U, &X86::GR64_NOREXRegClass); |
53072 | break; |
53073 | case 'f': |
53074 | |
53075 | |
53076 | if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) |
53077 | return std::make_pair(0U, &X86::RFP32RegClass); |
53078 | if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) |
53079 | return std::make_pair(0U, &X86::RFP64RegClass); |
53080 | if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) |
53081 | return std::make_pair(0U, &X86::RFP80RegClass); |
53082 | break; |
53083 | case 'y': |
53084 | if (!Subtarget.hasMMX()) break; |
53085 | return std::make_pair(0U, &X86::VR64RegClass); |
53086 | case 'v': |
53087 | case 'x': |
53088 | if (!Subtarget.hasSSE1()) break; |
53089 | bool VConstraint = (Constraint[0] == 'v'); |
53090 | |
53091 | switch (VT.SimpleTy) { |
53092 | default: break; |
53093 | |
53094 | case MVT::f16: |
53095 | if (VConstraint && Subtarget.hasFP16()) |
53096 | return std::make_pair(0U, &X86::FR16XRegClass); |
53097 | break; |
53098 | case MVT::f32: |
53099 | case MVT::i32: |
53100 | if (VConstraint && Subtarget.hasVLX()) |
53101 | return std::make_pair(0U, &X86::FR32XRegClass); |
53102 | return std::make_pair(0U, &X86::FR32RegClass); |
53103 | case MVT::f64: |
53104 | case MVT::i64: |
53105 | if (VConstraint && Subtarget.hasVLX()) |
53106 | return std::make_pair(0U, &X86::FR64XRegClass); |
53107 | return std::make_pair(0U, &X86::FR64RegClass); |
53108 | case MVT::i128: |
53109 | if (Subtarget.is64Bit()) { |
53110 | if (VConstraint && Subtarget.hasVLX()) |
53111 | return std::make_pair(0U, &X86::VR128XRegClass); |
53112 | return std::make_pair(0U, &X86::VR128RegClass); |
53113 | } |
53114 | break; |
53115 | |
53116 | case MVT::v8f16: |
53117 | if (!Subtarget.hasFP16()) |
53118 | break; |
53119 | LLVM_FALLTHROUGH; |
53120 | case MVT::f128: |
53121 | case MVT::v16i8: |
53122 | case MVT::v8i16: |
53123 | case MVT::v4i32: |
53124 | case MVT::v2i64: |
53125 | case MVT::v4f32: |
53126 | case MVT::v2f64: |
53127 | if (VConstraint && Subtarget.hasVLX()) |
53128 | return std::make_pair(0U, &X86::VR128XRegClass); |
53129 | return std::make_pair(0U, &X86::VR128RegClass); |
53130 | |
53131 | case MVT::v16f16: |
53132 | if (!Subtarget.hasFP16()) |
53133 | break; |
53134 | LLVM_FALLTHROUGH; |
53135 | case MVT::v32i8: |
53136 | case MVT::v16i16: |
53137 | case MVT::v8i32: |
53138 | case MVT::v4i64: |
53139 | case MVT::v8f32: |
53140 | case MVT::v4f64: |
53141 | if (VConstraint && Subtarget.hasVLX()) |
53142 | return std::make_pair(0U, &X86::VR256XRegClass); |
53143 | if (Subtarget.hasAVX()) |
53144 | return std::make_pair(0U, &X86::VR256RegClass); |
53145 | break; |
53146 | case MVT::v32f16: |
53147 | if (!Subtarget.hasFP16()) |
53148 | break; |
53149 | LLVM_FALLTHROUGH; |
53150 | case MVT::v64i8: |
53151 | case MVT::v32i16: |
53152 | case MVT::v8f64: |
53153 | case MVT::v16f32: |
53154 | case MVT::v16i32: |
53155 | case MVT::v8i64: |
53156 | if (!Subtarget.hasAVX512()) break; |
53157 | if (VConstraint) |
53158 | return std::make_pair(0U, &X86::VR512RegClass); |
53159 | return std::make_pair(0U, &X86::VR512_0_15RegClass); |
53160 | } |
53161 | break; |
53162 | } |
53163 | } else if (Constraint.size() == 2 && Constraint[0] == 'Y') { |
53164 | switch (Constraint[1]) { |
53165 | default: |
53166 | break; |
53167 | case 'i': |
53168 | case 't': |
53169 | case '2': |
53170 | return getRegForInlineAsmConstraint(TRI, "x", VT); |
53171 | case 'm': |
53172 | if (!Subtarget.hasMMX()) break; |
53173 | return std::make_pair(0U, &X86::VR64RegClass); |
53174 | case 'z': |
53175 | if (!Subtarget.hasSSE1()) break; |
53176 | switch (VT.SimpleTy) { |
53177 | default: break; |
53178 | |
53179 | case MVT::f16: |
53180 | if (!Subtarget.hasFP16()) |
53181 | break; |
53182 | return std::make_pair(X86::XMM0, &X86::FR16XRegClass); |
53183 | case MVT::f32: |
53184 | case MVT::i32: |
53185 | return std::make_pair(X86::XMM0, &X86::FR32RegClass); |
53186 | case MVT::f64: |
53187 | case MVT::i64: |
53188 | return std::make_pair(X86::XMM0, &X86::FR64RegClass); |
53189 | case MVT::v8f16: |
53190 | if (!Subtarget.hasFP16()) |
53191 | break; |
53192 | LLVM_FALLTHROUGH; |
53193 | case MVT::f128: |
53194 | case MVT::v16i8: |
53195 | case MVT::v8i16: |
53196 | case MVT::v4i32: |
53197 | case MVT::v2i64: |
53198 | case MVT::v4f32: |
53199 | case MVT::v2f64: |
53200 | return std::make_pair(X86::XMM0, &X86::VR128RegClass); |
53201 | |
53202 | case MVT::v16f16: |
53203 | if (!Subtarget.hasFP16()) |
53204 | break; |
53205 | LLVM_FALLTHROUGH; |
53206 | case MVT::v32i8: |
53207 | case MVT::v16i16: |
53208 | case MVT::v8i32: |
53209 | case MVT::v4i64: |
53210 | case MVT::v8f32: |
53211 | case MVT::v4f64: |
53212 | if (Subtarget.hasAVX()) |
53213 | return std::make_pair(X86::YMM0, &X86::VR256RegClass); |
53214 | break; |
53215 | case MVT::v32f16: |
53216 | if (!Subtarget.hasFP16()) |
53217 | break; |
53218 | LLVM_FALLTHROUGH; |
53219 | case MVT::v64i8: |
53220 | case MVT::v32i16: |
53221 | case MVT::v8f64: |
53222 | case MVT::v16f32: |
53223 | case MVT::v16i32: |
53224 | case MVT::v8i64: |
53225 | if (Subtarget.hasAVX512()) |
53226 | return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass); |
53227 | break; |
53228 | } |
53229 | break; |
53230 | case 'k': |
53231 | |
53232 | if (Subtarget.hasAVX512()) { |
53233 | if (VT == MVT::i1) |
53234 | return std::make_pair(0U, &X86::VK1WMRegClass); |
53235 | if (VT == MVT::i8) |
53236 | return std::make_pair(0U, &X86::VK8WMRegClass); |
53237 | if (VT == MVT::i16) |
53238 | return std::make_pair(0U, &X86::VK16WMRegClass); |
53239 | } |
53240 | if (Subtarget.hasBWI()) { |
53241 | if (VT == MVT::i32) |
53242 | return std::make_pair(0U, &X86::VK32WMRegClass); |
53243 | if (VT == MVT::i64) |
53244 | return std::make_pair(0U, &X86::VK64WMRegClass); |
53245 | } |
53246 | break; |
53247 | } |
53248 | } |
53249 | |
53250 | if (parseConstraintCode(Constraint) != X86::COND_INVALID) |
53251 | return std::make_pair(0U, &X86::GR32RegClass); |
53252 | |
53253 | |
53254 | |
53255 | std::pair<Register, const TargetRegisterClass*> Res; |
53256 | Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
53257 | |
53258 | |
53259 | if (!Res.second) { |
53260 | |
53261 | |
53262 | if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) { |
53263 | |
53264 | if (Constraint.size() == 7 && Constraint[0] == '{' && |
53265 | tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' && |
53266 | Constraint[3] == '(' && |
53267 | (Constraint[4] >= '0' && Constraint[4] <= '7') && |
53268 | Constraint[5] == ')' && Constraint[6] == '}') { |
53269 | |
53270 | |
53271 | if (Constraint[4] == '7') |
53272 | return std::make_pair(X86::FP7, &X86::RFP80_7RegClass); |
53273 | return std::make_pair(X86::FP0 + Constraint[4] - '0', |
53274 | &X86::RFP80RegClass); |
53275 | } |
53276 | |
53277 | |
53278 | if (StringRef("{st}").equals_insensitive(Constraint)) |
53279 | return std::make_pair(X86::FP0, &X86::RFP80RegClass); |
53280 | } |
53281 | |
53282 | |
53283 | if (StringRef("{flags}").equals_insensitive(Constraint)) |
53284 | return std::make_pair(X86::EFLAGS, &X86::CCRRegClass); |
53285 | |
53286 | |
53287 | |
53288 | if (StringRef("{dirflag}").equals_insensitive(Constraint) && |
53289 | VT == MVT::Other) |
53290 | return std::make_pair(X86::DF, &X86::DFCCRRegClass); |
53291 | |
53292 | |
53293 | if (StringRef("{fpsr}").equals_insensitive(Constraint)) |
53294 | return std::make_pair(X86::FPSW, &X86::FPCCRRegClass); |
53295 | |
53296 | return Res; |
53297 | } |
53298 | |
53299 | |
53300 | if (!Subtarget.is64Bit() && |
53301 | (isFRClass(*Res.second) || isGRClass(*Res.second)) && |
53302 | TRI->getEncodingValue(Res.first) >= 8) { |
53303 | |
53304 | return std::make_pair(0, nullptr); |
53305 | } |
53306 | |
53307 | |
53308 | if (!Subtarget.hasAVX512() && isFRClass(*Res.second) && |
53309 | TRI->getEncodingValue(Res.first) & 0x10) { |
53310 | |
53311 | return std::make_pair(0, nullptr); |
53312 | } |
53313 | |
53314 | |
53315 | |
53316 | |
53317 | |
53318 | if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other) |
53319 | return Res; |
53320 | |
53321 | |
53322 | |
53323 | |
53324 | const TargetRegisterClass *Class = Res.second; |
53325 | |
53326 | |
53327 | |
53328 | |
53329 | if (isGRClass(*Class)) { |
53330 | unsigned Size = VT.getSizeInBits(); |
53331 | if (Size == 1) Size = 8; |
53332 | Register DestReg = getX86SubSuperRegisterOrZero(Res.first, Size); |
53333 | if (DestReg > 0) { |
53334 | bool is64Bit = Subtarget.is64Bit(); |
53335 | const TargetRegisterClass *RC = |
53336 | Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass) |
53337 | : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass) |
53338 | : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass) |
53339 | : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr) |
53340 | : nullptr; |
53341 | if (Size == 64 && !is64Bit) { |
53342 | |
53343 | |
53344 | switch (DestReg) { |
53345 | case X86::RAX: |
53346 | return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); |
53347 | case X86::RDX: |
53348 | return std::make_pair(X86::EDX, &X86::GR32_DCRegClass); |
53349 | case X86::RCX: |
53350 | return std::make_pair(X86::ECX, &X86::GR32_CBRegClass); |
53351 | case X86::RBX: |
53352 | return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass); |
53353 | case X86::RSI: |
53354 | return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass); |
53355 | case X86::RDI: |
53356 | return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass); |
53357 | case X86::RBP: |
53358 | return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass); |
53359 | default: |
53360 | return std::make_pair(0, nullptr); |
53361 | } |
53362 | } |
53363 | if (RC && RC->contains(DestReg)) |
53364 | return std::make_pair(DestReg, RC); |
53365 | return Res; |
53366 | } |
53367 | |
53368 | return std::make_pair(0, nullptr); |
53369 | } else if (isFRClass(*Class)) { |
53370 | |
53371 | |
53372 | |
53373 | |
53374 | |
53375 | |
53376 | if (VT == MVT::f16) |
53377 | Res.second = &X86::FR16XRegClass; |
53378 | else if (VT == MVT::f32 || VT == MVT::i32) |
53379 | Res.second = &X86::FR32XRegClass; |
53380 | else if (VT == MVT::f64 || VT == MVT::i64) |
53381 | Res.second = &X86::FR64XRegClass; |
53382 | else if (TRI->isTypeLegalForClass(X86::VR128XRegClass, VT)) |
53383 | Res.second = &X86::VR128XRegClass; |
53384 | else if (TRI->isTypeLegalForClass(X86::VR256XRegClass, VT)) |
53385 | Res.second = &X86::VR256XRegClass; |
53386 | else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT)) |
53387 | Res.second = &X86::VR512RegClass; |
53388 | else { |
53389 | |
53390 | Res.first = 0; |
53391 | Res.second = nullptr; |
53392 | } |
53393 | } else if (isVKClass(*Class)) { |
53394 | if (VT == MVT::i1) |
53395 | Res.second = &X86::VK1RegClass; |
53396 | else if (VT == MVT::i8) |
53397 | Res.second = &X86::VK8RegClass; |
53398 | else if (VT == MVT::i16) |
53399 | Res.second = &X86::VK16RegClass; |
53400 | else if (VT == MVT::i32) |
53401 | Res.second = &X86::VK32RegClass; |
53402 | else if (VT == MVT::i64) |
53403 | Res.second = &X86::VK64RegClass; |
53404 | else { |
53405 | |
53406 | Res.first = 0; |
53407 | Res.second = nullptr; |
53408 | } |
53409 | } |
53410 | |
53411 | return Res; |
53412 | } |
53413 | |
53414 | InstructionCost X86TargetLowering::getScalingFactorCost(const DataLayout &DL, |
53415 | const AddrMode &AM, |
53416 | Type *Ty, |
53417 | unsigned AS) const { |
53418 | |
53419 | |
53420 | |
53421 | |
53422 | |
53423 | |
53424 | |
53425 | |
53426 | |
53427 | |
53428 | |
53429 | |
53430 | |
53431 | |
53432 | |
53433 | |
53434 | |
53435 | |
53436 | if (isLegalAddressingMode(DL, AM, Ty, AS)) |
53437 | |
53438 | |
53439 | return AM.Scale != 0; |
53440 | return -1; |
53441 | } |
53442 | |
53443 | bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { |
53444 | |
53445 | |
53446 | |
53447 | |
53448 | |
53449 | |
53450 | |
53451 | bool OptSize = Attr.hasFnAttr(Attribute::MinSize); |
53452 | return OptSize && !VT.isVector(); |
53453 | } |
53454 | |
53455 | void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { |
53456 | if (!Subtarget.is64Bit()) |
53457 | return; |
53458 | |
53459 | |
53460 | X86MachineFunctionInfo *AFI = |
53461 | Entry->getParent()->getInfo<X86MachineFunctionInfo>(); |
53462 | AFI->setIsSplitCSR(true); |
53463 | } |
53464 | |
53465 | void X86TargetLowering::insertCopiesSplitCSR( |
53466 | MachineBasicBlock *Entry, |
53467 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
53468 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
53469 | const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); |
53470 | if (!IStart) |
53471 | return; |
53472 | |
53473 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
53474 | MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); |
53475 | MachineBasicBlock::iterator MBBI = Entry->begin(); |
53476 | for (const MCPhysReg *I = IStart; *I; ++I) { |
53477 | const TargetRegisterClass *RC = nullptr; |
53478 | if (X86::GR64RegClass.contains(*I)) |
53479 | RC = &X86::GR64RegClass; |
53480 | else |
53481 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
53482 | |
53483 | Register NewVR = MRI->createVirtualRegister(RC); |
53484 | |
53485 | |
53486 | |
53487 | |
53488 | |
53489 | assert( |
53490 | Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) && |
53491 | "Function should be nounwind in insertCopiesSplitCSR!"); |
53492 | Entry->addLiveIn(*I); |
53493 | BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) |
53494 | .addReg(*I); |
53495 | |
53496 | |
53497 | for (auto *Exit : Exits) |
53498 | BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), |
53499 | TII->get(TargetOpcode::COPY), *I) |
53500 | .addReg(NewVR); |
53501 | } |
53502 | } |
53503 | |
53504 | bool X86TargetLowering::supportSwiftError() const { |
53505 | return Subtarget.is64Bit(); |
53506 | } |
53507 | |
53508 | |
53509 | bool X86TargetLowering::hasStackProbeSymbol(MachineFunction &MF) const { |
53510 | return !getStackProbeSymbolName(MF).empty(); |
53511 | } |
53512 | |
53513 | |
53514 | bool X86TargetLowering::hasInlineStackProbe(MachineFunction &MF) const { |
53515 | |
53516 | |
53517 | if (Subtarget.isOSWindows() || |
53518 | MF.getFunction().hasFnAttribute("no-stack-arg-probe")) |
53519 | return false; |
53520 | |
53521 | |
53522 | if (MF.getFunction().hasFnAttribute("probe-stack")) |
53523 | return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == |
53524 | "inline-asm"; |
53525 | |
53526 | return false; |
53527 | } |
53528 | |
53529 | |
53530 | |
53531 | StringRef |
53532 | X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { |
53533 | |
53534 | if (hasInlineStackProbe(MF)) |
53535 | return ""; |
53536 | |
53537 | |
53538 | if (MF.getFunction().hasFnAttribute("probe-stack")) |
53539 | return MF.getFunction().getFnAttribute("probe-stack").getValueAsString(); |
53540 | |
53541 | |
53542 | |
53543 | if (!Subtarget.isOSWindows() || Subtarget.isTargetMachO() || |
53544 | MF.getFunction().hasFnAttribute("no-stack-arg-probe")) |
53545 | return ""; |
53546 | |
53547 | |
53548 | |
53549 | if (Subtarget.is64Bit()) |
53550 | return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; |
53551 | return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; |
53552 | } |
53553 | |
53554 | unsigned |
53555 | X86TargetLowering::getStackProbeSize(MachineFunction &MF) const { |
53556 | |
53557 | |
53558 | unsigned StackProbeSize = 4096; |
53559 | const Function &Fn = MF.getFunction(); |
53560 | if (Fn.hasFnAttribute("stack-probe-size")) |
53561 | Fn.getFnAttribute("stack-probe-size") |
53562 | .getValueAsString() |
53563 | .getAsInteger(0, StackProbeSize); |
53564 | return StackProbeSize; |
53565 | } |
53566 | |
53567 | Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { |
53568 | if (ML->isInnermost() && |
53569 | ExperimentalPrefInnermostLoopAlignment.getNumOccurrences()) |
53570 | return Align(1ULL << ExperimentalPrefInnermostLoopAlignment); |
53571 | return TargetLowering::getPrefLoopAlignment(); |
53572 | } |