LLVM 22.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
18#include "llvm/ADT/SmallSet.h"
24#include "llvm/IR/GlobalAlias.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsS390.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
128 }
129
130 if (Subtarget.hasVector())
131 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
132 }
133
134 // Compute derived properties from the register classes
135 computeRegisterProperties(Subtarget.getRegisterInfo());
136
137 // Set up special registers.
138 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
139
140 // TODO: It may be better to default to latency-oriented scheduling, however
141 // LLVM's current latency-oriented scheduler can't handle physreg definitions
142 // such as SystemZ has with CC, so set this to the register-pressure
143 // scheduler, because it can.
145
148
150
151 // Instructions are strings of 2-byte aligned 2-byte values.
153 // For performance reasons we prefer 16-byte alignment.
155
156 // Handle operations that are handled in a similar way for all types.
157 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
158 I <= MVT::LAST_FP_VALUETYPE;
159 ++I) {
161 if (isTypeLegal(VT)) {
162 // Lower SET_CC into an IPM-based sequence.
166
167 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
169
170 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
172 setOperationAction(ISD::BR_CC, VT, Custom);
173 }
174 }
175
176 // Expand jump table branches as address arithmetic followed by an
177 // indirect jump.
178 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
179
180 // Expand BRCOND into a BR_CC (see above).
181 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
182
183 // Handle integer types except i128.
184 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
185 I <= MVT::LAST_INTEGER_VALUETYPE;
186 ++I) {
188 if (isTypeLegal(VT) && VT != MVT::i128) {
190
191 // Expand individual DIV and REMs into DIVREMs.
198
199 // Support addition/subtraction with overflow.
202
203 // Support addition/subtraction with carry.
206
207 // Support carry in as value rather than glue.
210
211 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
212 // available, or if the operand is constant.
213 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
214
215 // Use POPCNT on z196 and above.
216 if (Subtarget.hasPopulationCount())
218 else
220
221 // No special instructions for these.
224
225 // Use *MUL_LOHI where possible instead of MULH*.
230
231 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
232 // unsigned on z10 (only z196 and above have native support for
233 // unsigned conversions).
240 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
241 auto OpAction =
242 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
243 setOperationAction(Op, VT, OpAction);
244 }
245 }
246 }
247
248 // Handle i128 if legal.
249 if (isTypeLegal(MVT::i128)) {
250 // No special instructions for these.
257
258 // We may be able to use VSLDB/VSLD/VSRD for these.
261
262 // No special instructions for these before z17.
263 if (!Subtarget.hasVectorEnhancements3()) {
273 } else {
274 // Even if we do have a legal 128-bit multiply, we do not
275 // want 64-bit multiply-high operations to use it.
278 }
279
280 // Support addition/subtraction with carry.
285
286 // Use VPOPCT and add up partial results.
288
289 // Additional instructions available with z17.
290 if (Subtarget.hasVectorEnhancements3()) {
291 setOperationAction(ISD::ABS, MVT::i128, Legal);
292
294 MVT::i128, Legal);
295 }
296 }
297
298 // These need custom handling in order to handle the f16 conversions.
307
308 // Type legalization will convert 8- and 16-bit atomic operations into
309 // forms that operate on i32s (but still keeping the original memory VT).
310 // Lower them into full i32 operations.
311 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
312 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
313 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
314 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
315 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
316 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
317 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
318 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
319 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
320 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
321 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
322
323 // Whether or not i128 is not a legal type, we need to custom lower
324 // the atomic operations in order to exploit SystemZ instructions.
325 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
326 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
327 setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom);
328 setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom);
329
330 // Mark sign/zero extending atomic loads as legal, which will make
331 // DAGCombiner fold extensions into atomic loads if possible.
333 {MVT::i8, MVT::i16, MVT::i32}, Legal);
335 {MVT::i8, MVT::i16}, Legal);
337 MVT::i8, Legal);
338
339 // We can use the CC result of compare-and-swap to implement
340 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
341 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
342 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
343 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
344
345 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
346
347 // Traps are legal, as we will convert them to "j .+2".
348 setOperationAction(ISD::TRAP, MVT::Other, Legal);
349
350 // We have native support for a 64-bit CTLZ, via FLOGR.
354
355 // On z17 we have native support for a 64-bit CTTZ.
356 if (Subtarget.hasMiscellaneousExtensions4()) {
360 }
361
362 // On z15 we have native support for a 64-bit CTPOP.
363 if (Subtarget.hasMiscellaneousExtensions3()) {
366 }
367
368 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
370
371 // Expand 128 bit shifts without using a libcall.
375
376 // Also expand 256 bit shifts if i128 is a legal type.
377 if (isTypeLegal(MVT::i128)) {
381 }
382
383 // Handle bitcast from fp128 to i128.
384 if (!isTypeLegal(MVT::i128))
385 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
386
387 // We have native instructions for i8, i16 and i32 extensions, but not i1.
389 for (MVT VT : MVT::integer_valuetypes()) {
393 }
394
395 // Handle the various types of symbolic address.
401
402 // We need to handle dynamic allocations specially because of the
403 // 160-byte area at the bottom of the stack.
404 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
405 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
406
407 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
408 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
409
410 // Handle prefetches with PFD or PFDRL.
411 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
412
413 // Handle readcyclecounter with STCKF.
414 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
415
417 // Assume by default that all vector operations need to be expanded.
418 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
419 if (getOperationAction(Opcode, VT) == Legal)
420 setOperationAction(Opcode, VT, Expand);
421
422 // Likewise all truncating stores and extending loads.
423 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
424 setTruncStoreAction(VT, InnerVT, Expand);
427 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
428 }
429
430 if (isTypeLegal(VT)) {
431 // These operations are legal for anything that can be stored in a
432 // vector register, even if there is no native support for the format
433 // as such. In particular, we can do these for v4f32 even though there
434 // are no specific instructions for that format.
435 setOperationAction(ISD::LOAD, VT, Legal);
436 setOperationAction(ISD::STORE, VT, Legal);
438 setOperationAction(ISD::BITCAST, VT, Legal);
440
441 // Likewise, except that we need to replace the nodes with something
442 // more specific.
445 }
446 }
447
448 // Handle integer vector types.
450 if (isTypeLegal(VT)) {
451 // These operations have direct equivalents.
456 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
460 }
461 if (Subtarget.hasVectorEnhancements3() &&
462 VT != MVT::v16i8 && VT != MVT::v8i16) {
467 }
472 if (Subtarget.hasVectorEnhancements1())
474 else
478
479 // Convert a GPR scalar to a vector by inserting it into element 0.
481
482 // Use a series of unpacks for extensions.
485
486 // Detect shifts/rotates by a scalar amount and convert them into
487 // V*_BY_SCALAR.
492
493 // Add ISD::VECREDUCE_ADD as custom in order to implement
494 // it with VZERO+VSUM
495 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
496
497 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
498 // and inverting the result as necessary.
500
502 Legal);
503 }
504 }
505
506 if (Subtarget.hasVector()) {
507 // There should be no need to check for float types other than v2f64
508 // since <2 x f32> isn't a legal type.
517
526 }
527
528 if (Subtarget.hasVectorEnhancements2()) {
537
546 }
547
548 // Handle floating-point types.
549 if (!useSoftFloat()) {
550 // Promote all f16 operations to float, with some exceptions below.
551 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
552 setOperationAction(Opc, MVT::f16, Promote);
554 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
555 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
556 setTruncStoreAction(VT, MVT::f16, Expand);
557 }
558 for (auto Op : {ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE})
559 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
562 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
564 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
565 setOperationAction(Op, MVT::f16, Legal);
566 }
567
568 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
569 I <= MVT::LAST_FP_VALUETYPE;
570 ++I) {
572 if (isTypeLegal(VT) && VT != MVT::f16) {
573 // We can use FI for FRINT.
574 setOperationAction(ISD::FRINT, VT, Legal);
575
576 // We can use the extended form of FI for other rounding operations.
577 if (Subtarget.hasFPExtension()) {
578 setOperationAction(ISD::FNEARBYINT, VT, Legal);
579 setOperationAction(ISD::FFLOOR, VT, Legal);
580 setOperationAction(ISD::FCEIL, VT, Legal);
581 setOperationAction(ISD::FTRUNC, VT, Legal);
582 setOperationAction(ISD::FROUND, VT, Legal);
583 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
584 }
585
586 // No special instructions for these.
587 setOperationAction(ISD::FSIN, VT, Expand);
588 setOperationAction(ISD::FCOS, VT, Expand);
589 setOperationAction(ISD::FSINCOS, VT, Expand);
591 setOperationAction(ISD::FPOW, VT, Expand);
592
593 // Special treatment.
595
596 // Handle constrained floating-point operations.
605 if (Subtarget.hasFPExtension()) {
612 }
613
614 // Extension from f16 needs libcall.
615 setOperationAction(ISD::FP_EXTEND, VT, Custom);
617 }
618 }
619
620 // Handle floating-point vector types.
621 if (Subtarget.hasVector()) {
622 // Scalar-to-vector conversion is just a subreg.
625
626 // Some insertions and extractions can be done directly but others
627 // need to go via integers.
632
633 // These operations have direct equivalents.
634 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
635 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
636 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
637 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
638 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
639 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
640 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
641 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
642 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
643 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
644 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
645 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
646 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
647 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
648 setOperationAction(ISD::FROUNDEVEN, MVT::v2f64, Legal);
649
650 // Handle constrained floating-point operations.
664
669 if (Subtarget.hasVectorEnhancements1()) {
672 }
673 }
674
675 // The vector enhancements facility 1 has instructions for these.
676 if (Subtarget.hasVectorEnhancements1()) {
677 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
678 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
679 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
680 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
681 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
682 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
683 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
684 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
685 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
686 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
687 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
688 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
689 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
690 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
691 setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
692
693 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
694 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
695 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
696 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
697
698 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
699 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
700 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
701 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
702
703 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
704 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
705 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
706 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
707
708 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
709 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
710 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
711 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
712
713 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
714 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
715 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
716 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
717
718 // Handle constrained floating-point operations.
732 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
733 MVT::v4f32, MVT::v2f64 }) {
738 }
739 }
740
741 // We only have fused f128 multiply-addition on vector registers.
742 if (!Subtarget.hasVectorEnhancements1()) {
745 }
746
747 // We don't have a copysign instruction on vector registers.
748 if (Subtarget.hasVectorEnhancements1())
750
751 // Needed so that we don't try to implement f128 constant loads using
752 // a load-and-extend of a f80 constant (in cases where the constant
753 // would fit in an f80).
754 for (MVT VT : MVT::fp_valuetypes())
755 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
756
757 // We don't have extending load instruction on vector registers.
758 if (Subtarget.hasVectorEnhancements1()) {
759 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
760 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
761 }
762
763 // Floating-point truncation and stores need to be done separately.
764 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
765 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
766 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
767
768 // We have 64-bit FPR<->GPR moves, but need special handling for
769 // 32-bit forms.
770 if (!Subtarget.hasVector()) {
771 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
772 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
773 }
774
775 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
776 // structure, but VAEND is a no-op.
777 setOperationAction(ISD::VASTART, MVT::Other, Custom);
778 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
779 setOperationAction(ISD::VAEND, MVT::Other, Expand);
780
781 if (Subtarget.isTargetzOS()) {
782 // Handle address space casts between mixed sized pointers.
783 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
784 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
785 }
786
788
789 // Codes for which we want to perform some z-specific combinations.
793 ISD::LOAD,
794 ISD::STORE,
799 ISD::FP_EXTEND,
806 ISD::SRL,
807 ISD::SRA,
808 ISD::MUL,
809 ISD::SDIV,
810 ISD::UDIV,
811 ISD::SREM,
812 ISD::UREM,
815
816 // Handle intrinsics.
819
820 // We're not using SJLJ for exception handling, but they're implemented
821 // solely to support use of __builtin_setjmp / __builtin_longjmp.
824
825 // We want to use MVC in preference to even a single load/store pair.
826 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
828
829 // The main memset sequence is a byte store followed by an MVC.
830 // Two STC or MV..I stores win over that, but the kind of fused stores
831 // generated by target-independent code don't when the byte value is
832 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
833 // than "STC;MVC". Handle the choice in target-specific code instead.
834 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
836
837 // Default to having -disable-strictnode-mutation on
838 IsStrictFPEnabled = true;
839}
840
842 return Subtarget.hasSoftFloat();
843}
844
846 LLVMContext &, EVT VT) const {
847 if (!VT.isVector())
848 return MVT::i32;
850}
851
853 const MachineFunction &MF, EVT VT) const {
854 if (useSoftFloat())
855 return false;
856
857 VT = VT.getScalarType();
858
859 if (!VT.isSimple())
860 return false;
861
862 switch (VT.getSimpleVT().SimpleTy) {
863 case MVT::f32:
864 case MVT::f64:
865 return true;
866 case MVT::f128:
867 return Subtarget.hasVectorEnhancements1();
868 default:
869 break;
870 }
871
872 return false;
873}
874
875// Return true if the constant can be generated with a vector instruction,
876// such as VGM, VGMB or VREPI.
878 const SystemZSubtarget &Subtarget) {
879 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
880 if (!Subtarget.hasVector() ||
881 (isFP128 && !Subtarget.hasVectorEnhancements1()))
882 return false;
883
884 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
885 // preferred way of creating all-zero and all-one vectors so give it
886 // priority over other methods below.
887 unsigned Mask = 0;
888 unsigned I = 0;
889 for (; I < SystemZ::VectorBytes; ++I) {
890 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
891 if (Byte == 0xff)
892 Mask |= 1ULL << I;
893 else if (Byte != 0)
894 break;
895 }
896 if (I == SystemZ::VectorBytes) {
898 OpVals.push_back(Mask);
900 return true;
901 }
902
903 if (SplatBitSize > 64)
904 return false;
905
906 auto TryValue = [&](uint64_t Value) -> bool {
907 // Try VECTOR REPLICATE IMMEDIATE
908 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
909 if (isInt<16>(SignedValue)) {
910 OpVals.push_back(((unsigned) SignedValue));
913 SystemZ::VectorBits / SplatBitSize);
914 return true;
915 }
916 // Try VECTOR GENERATE MASK
917 unsigned Start, End;
918 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
919 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
920 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
921 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
922 OpVals.push_back(Start - (64 - SplatBitSize));
923 OpVals.push_back(End - (64 - SplatBitSize));
926 SystemZ::VectorBits / SplatBitSize);
927 return true;
928 }
929 return false;
930 };
931
932 // First try assuming that any undefined bits above the highest set bit
933 // and below the lowest set bit are 1s. This increases the likelihood of
934 // being able to use a sign-extended element value in VECTOR REPLICATE
935 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
936 uint64_t SplatBitsZ = SplatBits.getZExtValue();
937 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
938 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
939 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
940 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
941 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
942 if (TryValue(SplatBitsZ | Upper | Lower))
943 return true;
944
945 // Now try assuming that any undefined bits between the first and
946 // last defined set bits are set. This increases the chances of
947 // using a non-wraparound mask.
948 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
949 return TryValue(SplatBitsZ | Middle);
950}
951
953 if (IntImm.isSingleWord()) {
954 IntBits = APInt(128, IntImm.getZExtValue());
955 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
956 } else
957 IntBits = IntImm;
958 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
959
960 // Find the smallest splat.
961 SplatBits = IntImm;
962 unsigned Width = SplatBits.getBitWidth();
963 while (Width > 8) {
964 unsigned HalfSize = Width / 2;
965 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
966 APInt LowValue = SplatBits.trunc(HalfSize);
967
968 // If the two halves do not match, stop here.
969 if (HighValue != LowValue || 8 > HalfSize)
970 break;
971
972 SplatBits = HighValue;
973 Width = HalfSize;
974 }
975 SplatUndef = 0;
976 SplatBitSize = Width;
977}
978
980 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
981 bool HasAnyUndefs;
982
983 // Get IntBits by finding the 128 bit splat.
984 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
985 true);
986
987 // Get SplatBits by finding the 8 bit or greater splat.
988 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
989 true);
990}
991
993 bool ForCodeSize) const {
994 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
995 if (Imm.isZero() || Imm.isNegZero())
996 return true;
997
999}
1000
1003 MachineBasicBlock *MBB) const {
1004 DebugLoc DL = MI.getDebugLoc();
1005 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1006 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1007
1008 MachineFunction *MF = MBB->getParent();
1010
1011 const BasicBlock *BB = MBB->getBasicBlock();
1012 MachineFunction::iterator I = ++MBB->getIterator();
1013
1014 Register DstReg = MI.getOperand(0).getReg();
1015 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1016 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1017 (void)TRI;
1018 Register MainDstReg = MRI.createVirtualRegister(RC);
1019 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1020
1021 MVT PVT = getPointerTy(MF->getDataLayout());
1022 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1023 // For v = setjmp(buf), we generate.
1024 // Algorithm:
1025 //
1026 // ---------
1027 // | thisMBB |
1028 // ---------
1029 // |
1030 // ------------------------
1031 // | |
1032 // ---------- ---------------
1033 // | mainMBB | | restoreMBB |
1034 // | v = 0 | | v = 1 |
1035 // ---------- ---------------
1036 // | |
1037 // -------------------------
1038 // |
1039 // -----------------------------
1040 // | sinkMBB |
1041 // | phi(v_mainMBB,v_restoreMBB) |
1042 // -----------------------------
1043 // thisMBB:
1044 // buf[FPOffset] = Frame Pointer if hasFP.
1045 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1046 // buf[BCOffset] = Backchain value if building with -mbackchain.
1047 // buf[SPOffset] = Stack Pointer.
1048 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1049 // SjLjSetup restoreMBB
1050 // mainMBB:
1051 // v_main = 0
1052 // sinkMBB:
1053 // v = phi(v_main, v_restore)
1054 // restoreMBB:
1055 // v_restore = 1
1056
1057 MachineBasicBlock *ThisMBB = MBB;
1058 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1059 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1060 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1061
1062 MF->insert(I, MainMBB);
1063 MF->insert(I, SinkMBB);
1064 MF->push_back(RestoreMBB);
1065 RestoreMBB->setMachineBlockAddressTaken();
1066
1068
1069 // Transfer the remainder of BB and its successor edges to sinkMBB.
1070 SinkMBB->splice(SinkMBB->begin(), MBB,
1071 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1073
1074 // thisMBB:
1075 const int64_t FPOffset = 0; // Slot 1.
1076 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1077 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1078 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1079
1080 // Buf address.
1081 Register BufReg = MI.getOperand(1).getReg();
1082
1083 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1084 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1085
1086 // Prepare IP for longjmp.
1087 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1088 .addMBB(RestoreMBB);
1089 // Store IP for return from jmp, slot 2, offset = 1.
1090 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1091 .addReg(LabelReg)
1092 .addReg(BufReg)
1093 .addImm(LabelOffset)
1094 .addReg(0);
1095
1096 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1097 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1098 if (HasFP) {
1099 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1100 .addReg(SpecialRegs->getFramePointerRegister())
1101 .addReg(BufReg)
1102 .addImm(FPOffset)
1103 .addReg(0);
1104 }
1105
1106 // Store SP.
1107 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1108 .addReg(SpecialRegs->getStackPointerRegister())
1109 .addReg(BufReg)
1110 .addImm(SPOffset)
1111 .addReg(0);
1112
1113 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1114 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1115 if (BackChain) {
1116 Register BCReg = MRI.createVirtualRegister(PtrRC);
1117 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1118 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1119 .addReg(SpecialRegs->getStackPointerRegister())
1120 .addImm(TFL->getBackchainOffset(*MF))
1121 .addReg(0);
1122
1123 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1124 .addReg(BCReg)
1125 .addReg(BufReg)
1126 .addImm(BCOffset)
1127 .addReg(0);
1128 }
1129
1130 // Setup.
1131 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1132 .addMBB(RestoreMBB);
1133
1134 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1135 MIB.addRegMask(RegInfo->getNoPreservedMask());
1136
1137 ThisMBB->addSuccessor(MainMBB);
1138 ThisMBB->addSuccessor(RestoreMBB);
1139
1140 // mainMBB:
1141 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1142 MainMBB->addSuccessor(SinkMBB);
1143
1144 // sinkMBB:
1145 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1146 .addReg(MainDstReg)
1147 .addMBB(MainMBB)
1148 .addReg(RestoreDstReg)
1149 .addMBB(RestoreMBB);
1150
1151 // restoreMBB.
1152 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1153 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1154 RestoreMBB->addSuccessor(SinkMBB);
1155
1156 MI.eraseFromParent();
1157
1158 return SinkMBB;
1159}
1160
1163 MachineBasicBlock *MBB) const {
1164
1165 DebugLoc DL = MI.getDebugLoc();
1166 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1167
1168 MachineFunction *MF = MBB->getParent();
1170
1171 MVT PVT = getPointerTy(MF->getDataLayout());
1172 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1173 Register BufReg = MI.getOperand(0).getReg();
1174 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1175 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1176
1177 Register Tmp = MRI.createVirtualRegister(RC);
1178 Register BCReg = MRI.createVirtualRegister(RC);
1179
1181
1182 const int64_t FPOffset = 0;
1183 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1184 const int64_t BCOffset = 2 * PVT.getStoreSize();
1185 const int64_t SPOffset = 3 * PVT.getStoreSize();
1186 const int64_t LPOffset = 4 * PVT.getStoreSize();
1187
1188 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1189 .addReg(BufReg)
1190 .addImm(LabelOffset)
1191 .addReg(0);
1192
1193 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1194 SpecialRegs->getFramePointerRegister())
1195 .addReg(BufReg)
1196 .addImm(FPOffset)
1197 .addReg(0);
1198
1199 // We are restoring R13 even though we never stored in setjmp from llvm,
1200 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1201 // gcc setjmp and llvm longjmp.
1202 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1203 .addReg(BufReg)
1204 .addImm(LPOffset)
1205 .addReg(0);
1206
1207 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1208 if (BackChain) {
1209 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1210 .addReg(BufReg)
1211 .addImm(BCOffset)
1212 .addReg(0);
1213 }
1214
1215 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1216 SpecialRegs->getStackPointerRegister())
1217 .addReg(BufReg)
1218 .addImm(SPOffset)
1219 .addReg(0);
1220
1221 if (BackChain) {
1222 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1223 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1224 .addReg(BCReg)
1225 .addReg(SpecialRegs->getStackPointerRegister())
1226 .addImm(TFL->getBackchainOffset(*MF))
1227 .addReg(0);
1228 }
1229
1230 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1231
1232 MI.eraseFromParent();
1233 return MBB;
1234}
1235
1236/// Returns true if stack probing through inline assembly is requested.
1238 // If the function specifically requests inline stack probes, emit them.
1239 if (MF.getFunction().hasFnAttribute("probe-stack"))
1240 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1241 "inline-asm";
1242 return false;
1243}
1244
1249
1254
1257 // Don't expand subword operations as they require special treatment.
1258 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1260
1261 // Don't expand if there is a target instruction available.
1262 if (Subtarget.hasInterlockedAccess1() &&
1263 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1270
1272}
1273
1275 // We can use CGFI or CLGFI.
1276 return isInt<32>(Imm) || isUInt<32>(Imm);
1277}
1278
1280 // We can use ALGFI or SLGFI.
1281 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1282}
1283
1285 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1286 // Unaligned accesses should never be slower than the expanded version.
1287 // We check specifically for aligned accesses in the few cases where
1288 // they are required.
1289 if (Fast)
1290 *Fast = 1;
1291 return true;
1292}
1293
1295 EVT VT = Y.getValueType();
1296
1297 // We can use NC(G)RK for types in GPRs ...
1298 if (VT == MVT::i32 || VT == MVT::i64)
1299 return Subtarget.hasMiscellaneousExtensions3();
1300
1301 // ... or VNC for types in VRs.
1302 if (VT.isVector() || VT == MVT::i128)
1303 return Subtarget.hasVector();
1304
1305 return false;
1306}
1307
1308// Information about the addressing mode for a memory access.
1310 // True if a long displacement is supported.
1312
1313 // True if use of index register is supported.
1315
1316 AddressingMode(bool LongDispl, bool IdxReg) :
1317 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1318};
1319
1320// Return the desired addressing mode for a Load which has only one use (in
1321// the same block) which is a Store.
1323 Type *Ty) {
1324 // With vector support a Load->Store combination may be combined to either
1325 // an MVC or vector operations and it seems to work best to allow the
1326 // vector addressing mode.
1327 if (HasVector)
1328 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1329
1330 // Otherwise only the MVC case is special.
1331 bool MVC = Ty->isIntegerTy(8);
1332 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1333}
1334
1335// Return the addressing mode which seems most desirable given an LLVM
1336// Instruction pointer.
1337static AddressingMode
1340 switch (II->getIntrinsicID()) {
1341 default: break;
1342 case Intrinsic::memset:
1343 case Intrinsic::memmove:
1344 case Intrinsic::memcpy:
1345 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1346 }
1347 }
1348
1349 if (isa<LoadInst>(I) && I->hasOneUse()) {
1350 auto *SingleUser = cast<Instruction>(*I->user_begin());
1351 if (SingleUser->getParent() == I->getParent()) {
1352 if (isa<ICmpInst>(SingleUser)) {
1353 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1354 if (C->getBitWidth() <= 64 &&
1355 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1356 // Comparison of memory with 16 bit signed / unsigned immediate
1357 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1358 } else if (isa<StoreInst>(SingleUser))
1359 // Load->Store
1360 return getLoadStoreAddrMode(HasVector, I->getType());
1361 }
1362 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1363 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1364 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1365 // Load->Store
1366 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1367 }
1368
1369 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1370
1371 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1372 // dependencies (LDE only supports small offsets).
1373 // * Utilize the vector registers to hold floating point
1374 // values (vector load / store instructions only support small
1375 // offsets).
1376
1377 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1378 I->getOperand(0)->getType());
1379 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1380 bool IsVectorAccess = MemAccessTy->isVectorTy();
1381
1382 // A store of an extracted vector element will be combined into a VSTE type
1383 // instruction.
1384 if (!IsVectorAccess && isa<StoreInst>(I)) {
1385 Value *DataOp = I->getOperand(0);
1386 if (isa<ExtractElementInst>(DataOp))
1387 IsVectorAccess = true;
1388 }
1389
1390 // A load which gets inserted into a vector element will be combined into a
1391 // VLE type instruction.
1392 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1393 User *LoadUser = *I->user_begin();
1394 if (isa<InsertElementInst>(LoadUser))
1395 IsVectorAccess = true;
1396 }
1397
1398 if (IsFPAccess || IsVectorAccess)
1399 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1400 }
1401
1402 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1403}
1404
1406 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1407 // Punt on globals for now, although they can be used in limited
1408 // RELATIVE LONG cases.
1409 if (AM.BaseGV)
1410 return false;
1411
1412 // Require a 20-bit signed offset.
1413 if (!isInt<20>(AM.BaseOffs))
1414 return false;
1415
1416 bool RequireD12 =
1417 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1418 AddressingMode SupportedAM(!RequireD12, true);
1419 if (I != nullptr)
1420 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1421
1422 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1423 return false;
1424
1425 if (!SupportedAM.IndexReg)
1426 // No indexing allowed.
1427 return AM.Scale == 0;
1428 else
1429 // Indexing is OK but no scale factor can be applied.
1430 return AM.Scale == 0 || AM.Scale == 1;
1431}
1432
1434 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1435 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1436 const AttributeList &FuncAttributes) const {
1437 const int MVCFastLen = 16;
1438
1439 if (Limit != ~unsigned(0)) {
1440 // Don't expand Op into scalar loads/stores in these cases:
1441 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1442 return false; // Small memcpy: Use MVC
1443 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1444 return false; // Small memset (first byte with STC/MVI): Use MVC
1445 if (Op.isZeroMemset())
1446 return false; // Memset zero: Use XC
1447 }
1448
1449 return TargetLowering::findOptimalMemOpLowering(Context, MemOps, Limit, Op,
1450 DstAS, SrcAS, FuncAttributes);
1451}
1452
1454 LLVMContext &Context, const MemOp &Op,
1455 const AttributeList &FuncAttributes) const {
1456 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1457}
1458
1459bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1460 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1461 return false;
1462 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1463 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1464 return FromBits > ToBits;
1465}
1466
1468 if (!FromVT.isInteger() || !ToVT.isInteger())
1469 return false;
1470 unsigned FromBits = FromVT.getFixedSizeInBits();
1471 unsigned ToBits = ToVT.getFixedSizeInBits();
1472 return FromBits > ToBits;
1473}
1474
1475//===----------------------------------------------------------------------===//
1476// Inline asm support
1477//===----------------------------------------------------------------------===//
1478
1481 if (Constraint.size() == 1) {
1482 switch (Constraint[0]) {
1483 case 'a': // Address register
1484 case 'd': // Data register (equivalent to 'r')
1485 case 'f': // Floating-point register
1486 case 'h': // High-part register
1487 case 'r': // General-purpose register
1488 case 'v': // Vector register
1489 return C_RegisterClass;
1490
1491 case 'Q': // Memory with base and unsigned 12-bit displacement
1492 case 'R': // Likewise, plus an index
1493 case 'S': // Memory with base and signed 20-bit displacement
1494 case 'T': // Likewise, plus an index
1495 case 'm': // Equivalent to 'T'.
1496 return C_Memory;
1497
1498 case 'I': // Unsigned 8-bit constant
1499 case 'J': // Unsigned 12-bit constant
1500 case 'K': // Signed 16-bit constant
1501 case 'L': // Signed 20-bit displacement (on all targets we support)
1502 case 'M': // 0x7fffffff
1503 return C_Immediate;
1504
1505 default:
1506 break;
1507 }
1508 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1509 switch (Constraint[1]) {
1510 case 'Q': // Address with base and unsigned 12-bit displacement
1511 case 'R': // Likewise, plus an index
1512 case 'S': // Address with base and signed 20-bit displacement
1513 case 'T': // Likewise, plus an index
1514 return C_Address;
1515
1516 default:
1517 break;
1518 }
1519 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1520 if (StringRef("{@cc}").compare(Constraint) == 0)
1521 return C_Other;
1522 }
1523 return TargetLowering::getConstraintType(Constraint);
1524}
1525
1528 AsmOperandInfo &Info, const char *Constraint) const {
1530 Value *CallOperandVal = Info.CallOperandVal;
1531 // If we don't have a value, we can't do a match,
1532 // but allow it at the lowest weight.
1533 if (!CallOperandVal)
1534 return CW_Default;
1535 Type *type = CallOperandVal->getType();
1536 // Look at the constraint type.
1537 switch (*Constraint) {
1538 default:
1539 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1540 break;
1541
1542 case 'a': // Address register
1543 case 'd': // Data register (equivalent to 'r')
1544 case 'h': // High-part register
1545 case 'r': // General-purpose register
1546 Weight =
1547 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1548 break;
1549
1550 case 'f': // Floating-point register
1551 if (!useSoftFloat())
1552 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1553 break;
1554
1555 case 'v': // Vector register
1556 if (Subtarget.hasVector())
1557 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1558 : CW_Default;
1559 break;
1560
1561 case 'I': // Unsigned 8-bit constant
1562 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1563 if (isUInt<8>(C->getZExtValue()))
1564 Weight = CW_Constant;
1565 break;
1566
1567 case 'J': // Unsigned 12-bit constant
1568 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1569 if (isUInt<12>(C->getZExtValue()))
1570 Weight = CW_Constant;
1571 break;
1572
1573 case 'K': // Signed 16-bit constant
1574 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1575 if (isInt<16>(C->getSExtValue()))
1576 Weight = CW_Constant;
1577 break;
1578
1579 case 'L': // Signed 20-bit displacement (on all targets we support)
1580 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1581 if (isInt<20>(C->getSExtValue()))
1582 Weight = CW_Constant;
1583 break;
1584
1585 case 'M': // 0x7fffffff
1586 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1587 if (C->getZExtValue() == 0x7fffffff)
1588 Weight = CW_Constant;
1589 break;
1590 }
1591 return Weight;
1592}
1593
1594// Parse a "{tNNN}" register constraint for which the register type "t"
1595// has already been verified. MC is the class associated with "t" and
1596// Map maps 0-based register numbers to LLVM register numbers.
1597static std::pair<unsigned, const TargetRegisterClass *>
1599 const unsigned *Map, unsigned Size) {
1600 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1601 if (isdigit(Constraint[2])) {
1602 unsigned Index;
1603 bool Failed =
1604 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1605 if (!Failed && Index < Size && Map[Index])
1606 return std::make_pair(Map[Index], RC);
1607 }
1608 return std::make_pair(0U, nullptr);
1609}
1610
1611std::pair<unsigned, const TargetRegisterClass *>
1613 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1614 if (Constraint.size() == 1) {
1615 // GCC Constraint Letters
1616 switch (Constraint[0]) {
1617 default: break;
1618 case 'd': // Data register (equivalent to 'r')
1619 case 'r': // General-purpose register
1620 if (VT.getSizeInBits() == 64)
1621 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1622 else if (VT.getSizeInBits() == 128)
1623 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1624 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1625
1626 case 'a': // Address register
1627 if (VT == MVT::i64)
1628 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1629 else if (VT == MVT::i128)
1630 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1631 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1632
1633 case 'h': // High-part register (an LLVM extension)
1634 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1635
1636 case 'f': // Floating-point register
1637 if (!useSoftFloat()) {
1638 if (VT.getSizeInBits() == 16)
1639 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1640 else if (VT.getSizeInBits() == 64)
1641 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1642 else if (VT.getSizeInBits() == 128)
1643 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1644 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1645 }
1646 break;
1647
1648 case 'v': // Vector register
1649 if (Subtarget.hasVector()) {
1650 if (VT.getSizeInBits() == 16)
1651 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1652 if (VT.getSizeInBits() == 32)
1653 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1654 if (VT.getSizeInBits() == 64)
1655 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1656 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1657 }
1658 break;
1659 }
1660 }
1661 if (Constraint.starts_with("{")) {
1662
1663 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1664 // to check the size on.
1665 auto getVTSizeInBits = [&VT]() {
1666 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1667 };
1668
1669 // We need to override the default register parsing for GPRs and FPRs
1670 // because the interpretation depends on VT. The internal names of
1671 // the registers are also different from the external names
1672 // (F0D and F0S instead of F0, etc.).
1673 if (Constraint[1] == 'r') {
1674 if (getVTSizeInBits() == 32)
1675 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1677 if (getVTSizeInBits() == 128)
1678 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1680 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1682 }
1683 if (Constraint[1] == 'f') {
1684 if (useSoftFloat())
1685 return std::make_pair(
1686 0u, static_cast<const TargetRegisterClass *>(nullptr));
1687 if (getVTSizeInBits() == 16)
1688 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1690 if (getVTSizeInBits() == 32)
1691 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1693 if (getVTSizeInBits() == 128)
1694 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1696 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1698 }
1699 if (Constraint[1] == 'v') {
1700 if (!Subtarget.hasVector())
1701 return std::make_pair(
1702 0u, static_cast<const TargetRegisterClass *>(nullptr));
1703 if (getVTSizeInBits() == 16)
1704 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1706 if (getVTSizeInBits() == 32)
1707 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1709 if (getVTSizeInBits() == 64)
1710 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1712 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1714 }
1715 if (Constraint[1] == '@') {
1716 if (StringRef("{@cc}").compare(Constraint) == 0)
1717 return std::make_pair(0u, &SystemZ::GR32BitRegClass);
1718 }
1719 }
1720 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1721}
1722
1723// FIXME? Maybe this could be a TableGen attribute on some registers and
1724// this table could be generated automatically from RegInfo.
1727 const MachineFunction &MF) const {
1728 Register Reg =
1730 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1731 : SystemZ::NoRegister)
1732 .Case("r15",
1733 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1734 .Default(Register());
1735
1736 return Reg;
1737}
1738
1740 const Constant *PersonalityFn) const {
1741 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1742}
1743
1745 const Constant *PersonalityFn) const {
1746 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1747}
1748
1749// Convert condition code in CCReg to an i32 value.
1751 SDLoc DL(CCReg);
1752 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1753 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1754 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1755}
1756
1757// Lower @cc targets via setcc.
1759 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1760 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1761 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1762 return SDValue();
1763
1764 // Check that return type is valid.
1765 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1766 OpInfo.ConstraintVT.getSizeInBits() < 8)
1767 report_fatal_error("Glue output operand is of invalid type");
1768
1771 MRI.addLiveIn(SystemZ::CC);
1772
1773 if (Glue.getNode()) {
1774 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1775 Chain = Glue.getValue(1);
1776 } else
1777 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1778 return getCCResult(DAG, Glue);
1779}
1780
1782 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1783 SelectionDAG &DAG) const {
1784 // Only support length 1 constraints for now.
1785 if (Constraint.size() == 1) {
1786 switch (Constraint[0]) {
1787 case 'I': // Unsigned 8-bit constant
1788 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1789 if (isUInt<8>(C->getZExtValue()))
1790 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1791 Op.getValueType()));
1792 return;
1793
1794 case 'J': // Unsigned 12-bit constant
1795 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1796 if (isUInt<12>(C->getZExtValue()))
1797 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1798 Op.getValueType()));
1799 return;
1800
1801 case 'K': // Signed 16-bit constant
1802 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1803 if (isInt<16>(C->getSExtValue()))
1804 Ops.push_back(DAG.getSignedTargetConstant(
1805 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1806 return;
1807
1808 case 'L': // Signed 20-bit displacement (on all targets we support)
1809 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1810 if (isInt<20>(C->getSExtValue()))
1811 Ops.push_back(DAG.getSignedTargetConstant(
1812 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1813 return;
1814
1815 case 'M': // 0x7fffffff
1816 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1817 if (C->getZExtValue() == 0x7fffffff)
1818 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1819 Op.getValueType()));
1820 return;
1821 }
1822 }
1824}
1825
1826//===----------------------------------------------------------------------===//
1827// Calling conventions
1828//===----------------------------------------------------------------------===//
1829
1830#include "SystemZGenCallingConv.inc"
1831
1833 CallingConv::ID) const {
1834 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1835 SystemZ::R14D, 0 };
1836 return ScratchRegs;
1837}
1838
1840 Type *ToType) const {
1841 return isTruncateFree(FromType, ToType);
1842}
1843
1845 return CI->isTailCall();
1846}
1847
1848// Value is a value that has been passed to us in the location described by VA
1849// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1850// any loads onto Chain.
1852 CCValAssign &VA, SDValue Chain,
1853 SDValue Value) {
1854 // If the argument has been promoted from a smaller type, insert an
1855 // assertion to capture this.
1856 if (VA.getLocInfo() == CCValAssign::SExt)
1858 DAG.getValueType(VA.getValVT()));
1859 else if (VA.getLocInfo() == CCValAssign::ZExt)
1861 DAG.getValueType(VA.getValVT()));
1862
1863 if (VA.isExtInLoc())
1864 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1865 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1866 // If this is a short vector argument loaded from the stack,
1867 // extend from i64 to full vector size and then bitcast.
1868 assert(VA.getLocVT() == MVT::i64);
1869 assert(VA.getValVT().isVector());
1870 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1871 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1872 } else
1873 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1874 return Value;
1875}
1876
1877// Value is a value of type VA.getValVT() that we need to copy into
1878// the location described by VA. Return a copy of Value converted to
1879// VA.getValVT(). The caller is responsible for handling indirect values.
1881 CCValAssign &VA, SDValue Value) {
1882 switch (VA.getLocInfo()) {
1883 case CCValAssign::SExt:
1884 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1885 case CCValAssign::ZExt:
1886 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1887 case CCValAssign::AExt:
1888 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1889 case CCValAssign::BCvt: {
1890 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1891 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1892 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1893 // For an f32 vararg we need to first promote it to an f64 and then
1894 // bitcast it to an i64.
1895 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1896 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1897 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1898 ? MVT::v2i64
1899 : VA.getLocVT();
1900 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1901 // For ELF, this is a short vector argument to be stored to the stack,
1902 // bitcast to v2i64 and then extract first element.
1903 if (BitCastToType == MVT::v2i64)
1904 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1905 DAG.getConstant(0, DL, MVT::i32));
1906 return Value;
1907 }
1908 case CCValAssign::Full:
1909 return Value;
1910 default:
1911 llvm_unreachable("Unhandled getLocInfo()");
1912 }
1913}
1914
1916 SDLoc DL(In);
1917 SDValue Lo, Hi;
1918 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1919 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1920 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1921 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1922 DAG.getConstant(64, DL, MVT::i32)));
1923 } else {
1924 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1925 }
1926
1927 // FIXME: If v2i64 were a legal type, we could use it instead of
1928 // Untyped here. This might enable improved folding.
1929 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1930 MVT::Untyped, Hi, Lo);
1931 return SDValue(Pair, 0);
1932}
1933
1935 SDLoc DL(In);
1936 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1937 DL, MVT::i64, In);
1938 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1939 DL, MVT::i64, In);
1940
1941 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1942 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1943 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1944 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1945 DAG.getConstant(64, DL, MVT::i32));
1946 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1947 } else {
1948 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1949 }
1950}
1951
1953 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1954 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1955 EVT ValueVT = Val.getValueType();
1956 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1957 // Inline assembly operand.
1958 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1959 return true;
1960 }
1961
1962 return false;
1963}
1964
1966 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1967 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1968 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1969 // Inline assembly operand.
1970 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1971 return DAG.getBitcast(ValueVT, Res);
1972 }
1973
1974 return SDValue();
1975}
1976
1978 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1979 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1980 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1982 MachineFrameInfo &MFI = MF.getFrameInfo();
1984 SystemZMachineFunctionInfo *FuncInfo =
1986 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1987 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1988
1989 // Assign locations to all of the incoming arguments.
1991 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1992 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1993 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1994
1995 unsigned NumFixedGPRs = 0;
1996 unsigned NumFixedFPRs = 0;
1997 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1998 SDValue ArgValue;
1999 CCValAssign &VA = ArgLocs[I];
2000 EVT LocVT = VA.getLocVT();
2001 if (VA.isRegLoc()) {
2002 // Arguments passed in registers
2003 const TargetRegisterClass *RC;
2004 switch (LocVT.getSimpleVT().SimpleTy) {
2005 default:
2006 // Integers smaller than i64 should be promoted to i64.
2007 llvm_unreachable("Unexpected argument type");
2008 case MVT::i32:
2009 NumFixedGPRs += 1;
2010 RC = &SystemZ::GR32BitRegClass;
2011 break;
2012 case MVT::i64:
2013 NumFixedGPRs += 1;
2014 RC = &SystemZ::GR64BitRegClass;
2015 break;
2016 case MVT::f16:
2017 NumFixedFPRs += 1;
2018 RC = &SystemZ::FP16BitRegClass;
2019 break;
2020 case MVT::f32:
2021 NumFixedFPRs += 1;
2022 RC = &SystemZ::FP32BitRegClass;
2023 break;
2024 case MVT::f64:
2025 NumFixedFPRs += 1;
2026 RC = &SystemZ::FP64BitRegClass;
2027 break;
2028 case MVT::f128:
2029 NumFixedFPRs += 2;
2030 RC = &SystemZ::FP128BitRegClass;
2031 break;
2032 case MVT::v16i8:
2033 case MVT::v8i16:
2034 case MVT::v4i32:
2035 case MVT::v2i64:
2036 case MVT::v4f32:
2037 case MVT::v2f64:
2038 RC = &SystemZ::VR128BitRegClass;
2039 break;
2040 }
2041
2042 Register VReg = MRI.createVirtualRegister(RC);
2043 MRI.addLiveIn(VA.getLocReg(), VReg);
2044 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2045 } else {
2046 assert(VA.isMemLoc() && "Argument not register or memory");
2047
2048 // Create the frame index object for this incoming parameter.
2049 // FIXME: Pre-include call frame size in the offset, should not
2050 // need to manually add it here.
2051 int64_t ArgSPOffset = VA.getLocMemOffset();
2052 if (Subtarget.isTargetXPLINK64()) {
2053 auto &XPRegs =
2054 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2055 ArgSPOffset += XPRegs.getCallFrameSize();
2056 }
2057 int FI =
2058 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2059
2060 // Create the SelectionDAG nodes corresponding to a load
2061 // from this parameter. Unpromoted ints and floats are
2062 // passed as right-justified 8-byte values.
2063 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2064 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2065 VA.getLocVT() == MVT::f16) {
2066 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2067 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2068 DAG.getIntPtrConstant(SlotOffs, DL));
2069 }
2070 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2072 }
2073
2074 // Convert the value of the argument register into the value that's
2075 // being passed.
2076 if (VA.getLocInfo() == CCValAssign::Indirect) {
2077 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2079 // If the original argument was split (e.g. i128), we need
2080 // to load all parts of it here (using the same address).
2081 unsigned ArgIndex = Ins[I].OrigArgIndex;
2082 assert (Ins[I].PartOffset == 0);
2083 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
2084 CCValAssign &PartVA = ArgLocs[I + 1];
2085 unsigned PartOffset = Ins[I + 1].PartOffset;
2086 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2087 DAG.getIntPtrConstant(PartOffset, DL));
2088 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2090 ++I;
2091 }
2092 } else
2093 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2094 }
2095
2096 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2097 // Save the number of non-varargs registers for later use by va_start, etc.
2098 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2099 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2100
2101 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2102 Subtarget.getSpecialRegisters());
2103
2104 // Likewise the address (in the form of a frame index) of where the
2105 // first stack vararg would be. The 1-byte size here is arbitrary.
2106 // FIXME: Pre-include call frame size in the offset, should not
2107 // need to manually add it here.
2108 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2109 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2110 FuncInfo->setVarArgsFrameIndex(FI);
2111 }
2112
2113 if (IsVarArg && Subtarget.isTargetELF()) {
2114 // Save the number of non-varargs registers for later use by va_start, etc.
2115 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2116 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2117
2118 // Likewise the address (in the form of a frame index) of where the
2119 // first stack vararg would be. The 1-byte size here is arbitrary.
2120 int64_t VarArgsOffset = CCInfo.getStackSize();
2121 FuncInfo->setVarArgsFrameIndex(
2122 MFI.CreateFixedObject(1, VarArgsOffset, true));
2123
2124 // ...and a similar frame index for the caller-allocated save area
2125 // that will be used to store the incoming registers.
2126 int64_t RegSaveOffset =
2127 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2128 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2129 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2130
2131 // Store the FPR varargs in the reserved frame slots. (We store the
2132 // GPRs as part of the prologue.)
2133 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2135 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2136 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2137 int FI =
2139 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2141 &SystemZ::FP64BitRegClass);
2142 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2143 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2145 }
2146 // Join the stores, which are independent of one another.
2147 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2148 ArrayRef(&MemOps[NumFixedFPRs],
2149 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2150 }
2151 }
2152
2153 if (Subtarget.isTargetXPLINK64()) {
2154 // Create virual register for handling incoming "ADA" special register (R5)
2155 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2156 Register ADAvReg = MRI.createVirtualRegister(RC);
2157 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2158 Subtarget.getSpecialRegisters());
2159 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2160 FuncInfo->setADAVirtualRegister(ADAvReg);
2161 }
2162 return Chain;
2163}
2164
2165static bool canUseSiblingCall(const CCState &ArgCCInfo,
2168 // Punt if there are any indirect or stack arguments, or if the call
2169 // needs the callee-saved argument register R6, or if the call uses
2170 // the callee-saved register arguments SwiftSelf and SwiftError.
2171 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2172 CCValAssign &VA = ArgLocs[I];
2174 return false;
2175 if (!VA.isRegLoc())
2176 return false;
2177 Register Reg = VA.getLocReg();
2178 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2179 return false;
2180 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2181 return false;
2182 }
2183 return true;
2184}
2185
2187 unsigned Offset, bool LoadAdr = false) {
2190 Register ADAvReg = MFI->getADAVirtualRegister();
2192
2193 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2194 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2195
2196 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2197 if (!LoadAdr)
2198 Result = DAG.getLoad(
2199 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2201
2202 return Result;
2203}
2204
2205// ADA access using Global value
2206// Note: for functions, address of descriptor is returned
2208 EVT PtrVT) {
2209 unsigned ADAtype;
2210 bool LoadAddr = false;
2211 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2212 bool IsFunction =
2213 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2214 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2215
2216 if (IsFunction) {
2217 if (IsInternal) {
2219 LoadAddr = true;
2220 } else
2222 } else {
2224 }
2225 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2226
2227 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2228}
2229
2230static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2231 SDLoc &DL, SDValue &Chain) {
2232 unsigned ADADelta = 0; // ADA offset in desc.
2233 unsigned EPADelta = 8; // EPA offset in desc.
2236
2237 // XPLink calling convention.
2238 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2239 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2240 G->getGlobal()->hasPrivateLinkage());
2241 if (IsInternal) {
2244 Register ADAvReg = MFI->getADAVirtualRegister();
2245 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2246 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2247 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2248 return true;
2249 } else {
2251 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2252 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2253 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2254 }
2255 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2257 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2258 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2259 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2260 } else {
2261 // Function pointer case
2262 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2263 DAG.getConstant(ADADelta, DL, PtrVT));
2264 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2266 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2267 DAG.getConstant(EPADelta, DL, PtrVT));
2268 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2270 }
2271 return false;
2272}
2273
2274SDValue
2276 SmallVectorImpl<SDValue> &InVals) const {
2277 SelectionDAG &DAG = CLI.DAG;
2278 SDLoc &DL = CLI.DL;
2280 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2282 SDValue Chain = CLI.Chain;
2283 SDValue Callee = CLI.Callee;
2284 bool &IsTailCall = CLI.IsTailCall;
2285 CallingConv::ID CallConv = CLI.CallConv;
2286 bool IsVarArg = CLI.IsVarArg;
2288 EVT PtrVT = getPointerTy(MF.getDataLayout());
2289 LLVMContext &Ctx = *DAG.getContext();
2290 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2291
2292 // FIXME: z/OS support to be added in later.
2293 if (Subtarget.isTargetXPLINK64())
2294 IsTailCall = false;
2295
2296 // Integer args <=32 bits should have an extension attribute.
2297 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2298
2299 // Analyze the operands of the call, assigning locations to each operand.
2301 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2302 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2303
2304 // We don't support GuaranteedTailCallOpt, only automatically-detected
2305 // sibling calls.
2306 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2307 IsTailCall = false;
2308
2309 // Get a count of how many bytes are to be pushed on the stack.
2310 unsigned NumBytes = ArgCCInfo.getStackSize();
2311
2312 // Mark the start of the call.
2313 if (!IsTailCall)
2314 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2315
2316 // Copy argument values to their designated locations.
2318 SmallVector<SDValue, 8> MemOpChains;
2319 SDValue StackPtr;
2320 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2321 CCValAssign &VA = ArgLocs[I];
2322 SDValue ArgValue = OutVals[I];
2323
2324 if (VA.getLocInfo() == CCValAssign::Indirect) {
2325 // Store the argument in a stack slot and pass its address.
2326 unsigned ArgIndex = Outs[I].OrigArgIndex;
2327 EVT SlotVT;
2328 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2329 // Allocate the full stack space for a promoted (and split) argument.
2330 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
2331 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
2332 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2333 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2334 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
2335 } else {
2336 SlotVT = Outs[I].VT;
2337 }
2338 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2339 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2340 MemOpChains.push_back(
2341 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2343 // If the original argument was split (e.g. i128), we need
2344 // to store all parts of it here (and pass just one address).
2345 assert (Outs[I].PartOffset == 0);
2346 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2347 SDValue PartValue = OutVals[I + 1];
2348 unsigned PartOffset = Outs[I + 1].PartOffset;
2349 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2350 DAG.getIntPtrConstant(PartOffset, DL));
2351 MemOpChains.push_back(
2352 DAG.getStore(Chain, DL, PartValue, Address,
2354 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2355 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2356 ++I;
2357 }
2358 ArgValue = SpillSlot;
2359 } else
2360 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2361
2362 if (VA.isRegLoc()) {
2363 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2364 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2365 // and low values.
2366 if (VA.getLocVT() == MVT::i128)
2367 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2368 // Queue up the argument copies and emit them at the end.
2369 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2370 } else {
2371 assert(VA.isMemLoc() && "Argument not register or memory");
2372
2373 // Work out the address of the stack slot. Unpromoted ints and
2374 // floats are passed as right-justified 8-byte values.
2375 if (!StackPtr.getNode())
2376 StackPtr = DAG.getCopyFromReg(Chain, DL,
2377 Regs->getStackPointerRegister(), PtrVT);
2378 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2379 VA.getLocMemOffset();
2380 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2381 Offset += 4;
2382 else if (VA.getLocVT() == MVT::f16)
2383 Offset += 6;
2384 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2386
2387 // Emit the store.
2388 MemOpChains.push_back(
2389 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2390
2391 // Although long doubles or vectors are passed through the stack when
2392 // they are vararg (non-fixed arguments), if a long double or vector
2393 // occupies the third and fourth slot of the argument list GPR3 should
2394 // still shadow the third slot of the argument list.
2395 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2396 SDValue ShadowArgValue =
2397 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2398 DAG.getIntPtrConstant(1, DL));
2399 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2400 }
2401 }
2402 }
2403
2404 // Join the stores, which are independent of one another.
2405 if (!MemOpChains.empty())
2406 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2407
2408 // Accept direct calls by converting symbolic call addresses to the
2409 // associated Target* opcodes. Force %r1 to be used for indirect
2410 // tail calls.
2411 SDValue Glue;
2412
2413 if (Subtarget.isTargetXPLINK64()) {
2414 SDValue ADA;
2415 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2416 if (!IsBRASL) {
2417 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2418 ->getAddressOfCalleeRegister();
2419 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2420 Glue = Chain.getValue(1);
2421 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2422 }
2423 RegsToPass.push_back(std::make_pair(
2424 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2425 } else {
2426 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2427 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2428 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2429 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2430 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2431 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2432 } else if (IsTailCall) {
2433 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2434 Glue = Chain.getValue(1);
2435 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2436 }
2437 }
2438
2439 // Build a sequence of copy-to-reg nodes, chained and glued together.
2440 for (const auto &[Reg, N] : RegsToPass) {
2441 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2442 Glue = Chain.getValue(1);
2443 }
2444
2445 // The first call operand is the chain and the second is the target address.
2447 Ops.push_back(Chain);
2448 Ops.push_back(Callee);
2449
2450 // Add argument registers to the end of the list so that they are
2451 // known live into the call.
2452 for (const auto &[Reg, N] : RegsToPass)
2453 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2454
2455 // Add a register mask operand representing the call-preserved registers.
2456 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2457 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2458 assert(Mask && "Missing call preserved mask for calling convention");
2459 Ops.push_back(DAG.getRegisterMask(Mask));
2460
2461 // Glue the call to the argument copies, if any.
2462 if (Glue.getNode())
2463 Ops.push_back(Glue);
2464
2465 // Emit the call.
2466 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2467 if (IsTailCall) {
2468 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2469 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2470 return Ret;
2471 }
2472 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2473 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2474 Glue = Chain.getValue(1);
2475
2476 // Mark the end of the call, which is glued to the call itself.
2477 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2478 Glue = Chain.getValue(1);
2479
2480 // Assign locations to each value returned by this call.
2482 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2483 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2484
2485 // Copy all of the result registers out of their specified physreg.
2486 for (CCValAssign &VA : RetLocs) {
2487 // Copy the value out, gluing the copy to the end of the call sequence.
2488 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2489 VA.getLocVT(), Glue);
2490 Chain = RetValue.getValue(1);
2491 Glue = RetValue.getValue(2);
2492
2493 // Convert the value of the return register into the value that's
2494 // being returned.
2495 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2496 }
2497
2498 return Chain;
2499}
2500
2501// Generate a call taking the given operands as arguments and returning a
2502// result of type RetVT.
2504 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2505 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2506 bool DoesNotReturn, bool IsReturnValueUsed) const {
2508 Args.reserve(Ops.size());
2509
2510 for (SDValue Op : Ops) {
2512 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2513 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2514 Entry.IsZExt = !Entry.IsSExt;
2515 Args.push_back(Entry);
2516 }
2517
2518 SDValue Callee =
2519 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2520
2521 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2523 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2524 CLI.setDebugLoc(DL)
2525 .setChain(Chain)
2526 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2527 .setNoReturn(DoesNotReturn)
2528 .setDiscardResult(!IsReturnValueUsed)
2529 .setSExtResult(SignExtend)
2530 .setZExtResult(!SignExtend);
2531 return LowerCallTo(CLI);
2532}
2533
2535 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2536 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2537 const Type *RetTy) const {
2538 // Special case that we cannot easily detect in RetCC_SystemZ since
2539 // i128 may not be a legal type.
2540 for (auto &Out : Outs)
2541 if (Out.ArgVT == MVT::i128)
2542 return false;
2543
2545 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2546 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2547}
2548
2549SDValue
2551 bool IsVarArg,
2553 const SmallVectorImpl<SDValue> &OutVals,
2554 const SDLoc &DL, SelectionDAG &DAG) const {
2556
2557 // Integer args <=32 bits should have an extension attribute.
2558 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2559
2560 // Assign locations to each returned value.
2562 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2563 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2564
2565 // Quick exit for void returns
2566 if (RetLocs.empty())
2567 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2568
2569 if (CallConv == CallingConv::GHC)
2570 report_fatal_error("GHC functions return void only");
2571
2572 // Copy the result values into the output registers.
2573 SDValue Glue;
2575 RetOps.push_back(Chain);
2576 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2577 CCValAssign &VA = RetLocs[I];
2578 SDValue RetValue = OutVals[I];
2579
2580 // Make the return register live on exit.
2581 assert(VA.isRegLoc() && "Can only return in registers!");
2582
2583 // Promote the value as required.
2584 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2585
2586 // Chain and glue the copies together.
2587 Register Reg = VA.getLocReg();
2588 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2589 Glue = Chain.getValue(1);
2590 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2591 }
2592
2593 // Update chain and glue.
2594 RetOps[0] = Chain;
2595 if (Glue.getNode())
2596 RetOps.push_back(Glue);
2597
2598 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2599}
2600
2601// Return true if Op is an intrinsic node with chain that returns the CC value
2602// as its only (other) argument. Provide the associated SystemZISD opcode and
2603// the mask of valid CC values if so.
2604static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2605 unsigned &CCValid) {
2606 unsigned Id = Op.getConstantOperandVal(1);
2607 switch (Id) {
2608 case Intrinsic::s390_tbegin:
2609 Opcode = SystemZISD::TBEGIN;
2610 CCValid = SystemZ::CCMASK_TBEGIN;
2611 return true;
2612
2613 case Intrinsic::s390_tbegin_nofloat:
2615 CCValid = SystemZ::CCMASK_TBEGIN;
2616 return true;
2617
2618 case Intrinsic::s390_tend:
2619 Opcode = SystemZISD::TEND;
2620 CCValid = SystemZ::CCMASK_TEND;
2621 return true;
2622
2623 default:
2624 return false;
2625 }
2626}
2627
2628// Return true if Op is an intrinsic node without chain that returns the
2629// CC value as its final argument. Provide the associated SystemZISD
2630// opcode and the mask of valid CC values if so.
2631static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2632 unsigned Id = Op.getConstantOperandVal(0);
2633 switch (Id) {
2634 case Intrinsic::s390_vpkshs:
2635 case Intrinsic::s390_vpksfs:
2636 case Intrinsic::s390_vpksgs:
2637 Opcode = SystemZISD::PACKS_CC;
2638 CCValid = SystemZ::CCMASK_VCMP;
2639 return true;
2640
2641 case Intrinsic::s390_vpklshs:
2642 case Intrinsic::s390_vpklsfs:
2643 case Intrinsic::s390_vpklsgs:
2644 Opcode = SystemZISD::PACKLS_CC;
2645 CCValid = SystemZ::CCMASK_VCMP;
2646 return true;
2647
2648 case Intrinsic::s390_vceqbs:
2649 case Intrinsic::s390_vceqhs:
2650 case Intrinsic::s390_vceqfs:
2651 case Intrinsic::s390_vceqgs:
2652 case Intrinsic::s390_vceqqs:
2653 Opcode = SystemZISD::VICMPES;
2654 CCValid = SystemZ::CCMASK_VCMP;
2655 return true;
2656
2657 case Intrinsic::s390_vchbs:
2658 case Intrinsic::s390_vchhs:
2659 case Intrinsic::s390_vchfs:
2660 case Intrinsic::s390_vchgs:
2661 case Intrinsic::s390_vchqs:
2662 Opcode = SystemZISD::VICMPHS;
2663 CCValid = SystemZ::CCMASK_VCMP;
2664 return true;
2665
2666 case Intrinsic::s390_vchlbs:
2667 case Intrinsic::s390_vchlhs:
2668 case Intrinsic::s390_vchlfs:
2669 case Intrinsic::s390_vchlgs:
2670 case Intrinsic::s390_vchlqs:
2671 Opcode = SystemZISD::VICMPHLS;
2672 CCValid = SystemZ::CCMASK_VCMP;
2673 return true;
2674
2675 case Intrinsic::s390_vtm:
2676 Opcode = SystemZISD::VTM;
2677 CCValid = SystemZ::CCMASK_VCMP;
2678 return true;
2679
2680 case Intrinsic::s390_vfaebs:
2681 case Intrinsic::s390_vfaehs:
2682 case Intrinsic::s390_vfaefs:
2683 Opcode = SystemZISD::VFAE_CC;
2684 CCValid = SystemZ::CCMASK_ANY;
2685 return true;
2686
2687 case Intrinsic::s390_vfaezbs:
2688 case Intrinsic::s390_vfaezhs:
2689 case Intrinsic::s390_vfaezfs:
2690 Opcode = SystemZISD::VFAEZ_CC;
2691 CCValid = SystemZ::CCMASK_ANY;
2692 return true;
2693
2694 case Intrinsic::s390_vfeebs:
2695 case Intrinsic::s390_vfeehs:
2696 case Intrinsic::s390_vfeefs:
2697 Opcode = SystemZISD::VFEE_CC;
2698 CCValid = SystemZ::CCMASK_ANY;
2699 return true;
2700
2701 case Intrinsic::s390_vfeezbs:
2702 case Intrinsic::s390_vfeezhs:
2703 case Intrinsic::s390_vfeezfs:
2704 Opcode = SystemZISD::VFEEZ_CC;
2705 CCValid = SystemZ::CCMASK_ANY;
2706 return true;
2707
2708 case Intrinsic::s390_vfenebs:
2709 case Intrinsic::s390_vfenehs:
2710 case Intrinsic::s390_vfenefs:
2711 Opcode = SystemZISD::VFENE_CC;
2712 CCValid = SystemZ::CCMASK_ANY;
2713 return true;
2714
2715 case Intrinsic::s390_vfenezbs:
2716 case Intrinsic::s390_vfenezhs:
2717 case Intrinsic::s390_vfenezfs:
2718 Opcode = SystemZISD::VFENEZ_CC;
2719 CCValid = SystemZ::CCMASK_ANY;
2720 return true;
2721
2722 case Intrinsic::s390_vistrbs:
2723 case Intrinsic::s390_vistrhs:
2724 case Intrinsic::s390_vistrfs:
2725 Opcode = SystemZISD::VISTR_CC;
2727 return true;
2728
2729 case Intrinsic::s390_vstrcbs:
2730 case Intrinsic::s390_vstrchs:
2731 case Intrinsic::s390_vstrcfs:
2732 Opcode = SystemZISD::VSTRC_CC;
2733 CCValid = SystemZ::CCMASK_ANY;
2734 return true;
2735
2736 case Intrinsic::s390_vstrczbs:
2737 case Intrinsic::s390_vstrczhs:
2738 case Intrinsic::s390_vstrczfs:
2739 Opcode = SystemZISD::VSTRCZ_CC;
2740 CCValid = SystemZ::CCMASK_ANY;
2741 return true;
2742
2743 case Intrinsic::s390_vstrsb:
2744 case Intrinsic::s390_vstrsh:
2745 case Intrinsic::s390_vstrsf:
2746 Opcode = SystemZISD::VSTRS_CC;
2747 CCValid = SystemZ::CCMASK_ANY;
2748 return true;
2749
2750 case Intrinsic::s390_vstrszb:
2751 case Intrinsic::s390_vstrszh:
2752 case Intrinsic::s390_vstrszf:
2753 Opcode = SystemZISD::VSTRSZ_CC;
2754 CCValid = SystemZ::CCMASK_ANY;
2755 return true;
2756
2757 case Intrinsic::s390_vfcedbs:
2758 case Intrinsic::s390_vfcesbs:
2759 Opcode = SystemZISD::VFCMPES;
2760 CCValid = SystemZ::CCMASK_VCMP;
2761 return true;
2762
2763 case Intrinsic::s390_vfchdbs:
2764 case Intrinsic::s390_vfchsbs:
2765 Opcode = SystemZISD::VFCMPHS;
2766 CCValid = SystemZ::CCMASK_VCMP;
2767 return true;
2768
2769 case Intrinsic::s390_vfchedbs:
2770 case Intrinsic::s390_vfchesbs:
2771 Opcode = SystemZISD::VFCMPHES;
2772 CCValid = SystemZ::CCMASK_VCMP;
2773 return true;
2774
2775 case Intrinsic::s390_vftcidb:
2776 case Intrinsic::s390_vftcisb:
2777 Opcode = SystemZISD::VFTCI;
2778 CCValid = SystemZ::CCMASK_VCMP;
2779 return true;
2780
2781 case Intrinsic::s390_tdc:
2782 Opcode = SystemZISD::TDC;
2783 CCValid = SystemZ::CCMASK_TDC;
2784 return true;
2785
2786 default:
2787 return false;
2788 }
2789}
2790
2791// Emit an intrinsic with chain and an explicit CC register result.
2793 unsigned Opcode) {
2794 // Copy all operands except the intrinsic ID.
2795 unsigned NumOps = Op.getNumOperands();
2797 Ops.reserve(NumOps - 1);
2798 Ops.push_back(Op.getOperand(0));
2799 for (unsigned I = 2; I < NumOps; ++I)
2800 Ops.push_back(Op.getOperand(I));
2801
2802 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2803 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2804 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2805 SDValue OldChain = SDValue(Op.getNode(), 1);
2806 SDValue NewChain = SDValue(Intr.getNode(), 1);
2807 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2808 return Intr.getNode();
2809}
2810
2811// Emit an intrinsic with an explicit CC register result.
2813 unsigned Opcode) {
2814 // Copy all operands except the intrinsic ID.
2815 SDLoc DL(Op);
2816 unsigned NumOps = Op.getNumOperands();
2818 Ops.reserve(NumOps - 1);
2819 for (unsigned I = 1; I < NumOps; ++I) {
2820 SDValue CurrOper = Op.getOperand(I);
2821 if (CurrOper.getValueType() == MVT::f16) {
2822 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2823 "Unhandled intrinsic with f16 operand.");
2824 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2825 }
2826 Ops.push_back(CurrOper);
2827 }
2828
2829 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2830 return Intr.getNode();
2831}
2832
2833// CC is a comparison that will be implemented using an integer or
2834// floating-point comparison. Return the condition code mask for
2835// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2836// unsigned comparisons and clear for signed ones. In the floating-point
2837// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2839#define CONV(X) \
2840 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2841 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2842 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2843
2844 switch (CC) {
2845 default:
2846 llvm_unreachable("Invalid integer condition!");
2847
2848 CONV(EQ);
2849 CONV(NE);
2850 CONV(GT);
2851 CONV(GE);
2852 CONV(LT);
2853 CONV(LE);
2854
2855 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2857 }
2858#undef CONV
2859}
2860
2861// If C can be converted to a comparison against zero, adjust the operands
2862// as necessary.
2863static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2864 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2865 return;
2866
2867 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2868 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2869 return;
2870
2871 int64_t Value = ConstOp1->getSExtValue();
2872 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2873 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2874 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2875 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2876 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2877 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2878 }
2879}
2880
2881// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2882// adjust the operands as necessary.
2883static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2884 Comparison &C) {
2885 // For us to make any changes, it must a comparison between a single-use
2886 // load and a constant.
2887 if (!C.Op0.hasOneUse() ||
2888 C.Op0.getOpcode() != ISD::LOAD ||
2889 C.Op1.getOpcode() != ISD::Constant)
2890 return;
2891
2892 // We must have an 8- or 16-bit load.
2893 auto *Load = cast<LoadSDNode>(C.Op0);
2894 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2895 if ((NumBits != 8 && NumBits != 16) ||
2896 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2897 return;
2898
2899 // The load must be an extending one and the constant must be within the
2900 // range of the unextended value.
2901 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2902 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2903 return;
2904 uint64_t Value = ConstOp1->getZExtValue();
2905 uint64_t Mask = (1 << NumBits) - 1;
2906 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2907 // Make sure that ConstOp1 is in range of C.Op0.
2908 int64_t SignedValue = ConstOp1->getSExtValue();
2909 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2910 return;
2911 if (C.ICmpType != SystemZICMP::SignedOnly) {
2912 // Unsigned comparison between two sign-extended values is equivalent
2913 // to unsigned comparison between two zero-extended values.
2914 Value &= Mask;
2915 } else if (NumBits == 8) {
2916 // Try to treat the comparison as unsigned, so that we can use CLI.
2917 // Adjust CCMask and Value as necessary.
2918 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2919 // Test whether the high bit of the byte is set.
2920 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2921 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2922 // Test whether the high bit of the byte is clear.
2923 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2924 else
2925 // No instruction exists for this combination.
2926 return;
2927 C.ICmpType = SystemZICMP::UnsignedOnly;
2928 }
2929 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2930 if (Value > Mask)
2931 return;
2932 // If the constant is in range, we can use any comparison.
2933 C.ICmpType = SystemZICMP::Any;
2934 } else
2935 return;
2936
2937 // Make sure that the first operand is an i32 of the right extension type.
2938 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2941 if (C.Op0.getValueType() != MVT::i32 ||
2942 Load->getExtensionType() != ExtType) {
2943 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2944 Load->getBasePtr(), Load->getPointerInfo(),
2945 Load->getMemoryVT(), Load->getAlign(),
2946 Load->getMemOperand()->getFlags());
2947 // Update the chain uses.
2948 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2949 }
2950
2951 // Make sure that the second operand is an i32 with the right value.
2952 if (C.Op1.getValueType() != MVT::i32 ||
2953 Value != ConstOp1->getZExtValue())
2954 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2955}
2956
2957// Return true if Op is either an unextended load, or a load suitable
2958// for integer register-memory comparisons of type ICmpType.
2959static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2960 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2961 if (Load) {
2962 // There are no instructions to compare a register with a memory byte.
2963 if (Load->getMemoryVT() == MVT::i8)
2964 return false;
2965 // Otherwise decide on extension type.
2966 switch (Load->getExtensionType()) {
2967 case ISD::NON_EXTLOAD:
2968 return true;
2969 case ISD::SEXTLOAD:
2970 return ICmpType != SystemZICMP::UnsignedOnly;
2971 case ISD::ZEXTLOAD:
2972 return ICmpType != SystemZICMP::SignedOnly;
2973 default:
2974 break;
2975 }
2976 }
2977 return false;
2978}
2979
2980// Return true if it is better to swap the operands of C.
2981static bool shouldSwapCmpOperands(const Comparison &C) {
2982 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2983 if (C.Op0.getValueType() == MVT::i128)
2984 return false;
2985 if (C.Op0.getValueType() == MVT::f128)
2986 return false;
2987
2988 // Always keep a floating-point constant second, since comparisons with
2989 // zero can use LOAD TEST and comparisons with other constants make a
2990 // natural memory operand.
2991 if (isa<ConstantFPSDNode>(C.Op1))
2992 return false;
2993
2994 // Never swap comparisons with zero since there are many ways to optimize
2995 // those later.
2996 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2997 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2998 return false;
2999
3000 // Also keep natural memory operands second if the loaded value is
3001 // only used here. Several comparisons have memory forms.
3002 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
3003 return false;
3004
3005 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3006 // In that case we generally prefer the memory to be second.
3007 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3008 // The only exceptions are when the second operand is a constant and
3009 // we can use things like CHHSI.
3010 if (!ConstOp1)
3011 return true;
3012 // The unsigned memory-immediate instructions can handle 16-bit
3013 // unsigned integers.
3014 if (C.ICmpType != SystemZICMP::SignedOnly &&
3015 isUInt<16>(ConstOp1->getZExtValue()))
3016 return false;
3017 // The signed memory-immediate instructions can handle 16-bit
3018 // signed integers.
3019 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3020 isInt<16>(ConstOp1->getSExtValue()))
3021 return false;
3022 return true;
3023 }
3024
3025 // Try to promote the use of CGFR and CLGFR.
3026 unsigned Opcode0 = C.Op0.getOpcode();
3027 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3028 return true;
3029 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3030 return true;
3031 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3032 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3033 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3034 return true;
3035
3036 return false;
3037}
3038
3039// Check whether C tests for equality between X and Y and whether X - Y
3040// or Y - X is also computed. In that case it's better to compare the
3041// result of the subtraction against zero.
3043 Comparison &C) {
3044 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3045 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3046 for (SDNode *N : C.Op0->users()) {
3047 if (N->getOpcode() == ISD::SUB &&
3048 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3049 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3050 // Disable the nsw and nuw flags: the backend needs to handle
3051 // overflow as well during comparison elimination.
3052 N->dropFlags(SDNodeFlags::NoWrap);
3053 C.Op0 = SDValue(N, 0);
3054 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3055 return;
3056 }
3057 }
3058 }
3059}
3060
3061// Check whether C compares a floating-point value with zero and if that
3062// floating-point value is also negated. In this case we can use the
3063// negation to set CC, so avoiding separate LOAD AND TEST and
3064// LOAD (NEGATIVE/COMPLEMENT) instructions.
3065static void adjustForFNeg(Comparison &C) {
3066 // This optimization is invalid for strict comparisons, since FNEG
3067 // does not raise any exceptions.
3068 if (C.Chain)
3069 return;
3070 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3071 if (C1 && C1->isZero()) {
3072 for (SDNode *N : C.Op0->users()) {
3073 if (N->getOpcode() == ISD::FNEG) {
3074 C.Op0 = SDValue(N, 0);
3075 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3076 return;
3077 }
3078 }
3079 }
3080}
3081
3082// Check whether C compares (shl X, 32) with 0 and whether X is
3083// also sign-extended. In that case it is better to test the result
3084// of the sign extension using LTGFR.
3085//
3086// This case is important because InstCombine transforms a comparison
3087// with (sext (trunc X)) into a comparison with (shl X, 32).
3088static void adjustForLTGFR(Comparison &C) {
3089 // Check for a comparison between (shl X, 32) and 0.
3090 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3091 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3092 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3093 if (C1 && C1->getZExtValue() == 32) {
3094 SDValue ShlOp0 = C.Op0.getOperand(0);
3095 // See whether X has any SIGN_EXTEND_INREG uses.
3096 for (SDNode *N : ShlOp0->users()) {
3097 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3098 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3099 C.Op0 = SDValue(N, 0);
3100 return;
3101 }
3102 }
3103 }
3104 }
3105}
3106
3107// If C compares the truncation of an extending load, try to compare
3108// the untruncated value instead. This exposes more opportunities to
3109// reuse CC.
3110static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3111 Comparison &C) {
3112 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3113 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3114 C.Op1.getOpcode() == ISD::Constant &&
3115 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3116 C.Op1->getAsZExtVal() == 0) {
3117 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3118 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3119 C.Op0.getValueSizeInBits().getFixedValue()) {
3120 unsigned Type = L->getExtensionType();
3121 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3122 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3123 C.Op0 = C.Op0.getOperand(0);
3124 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3125 }
3126 }
3127 }
3128}
3129
3130// Return true if shift operation N has an in-range constant shift value.
3131// Store it in ShiftVal if so.
3132static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3133 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3134 if (!Shift)
3135 return false;
3136
3137 uint64_t Amount = Shift->getZExtValue();
3138 if (Amount >= N.getValueSizeInBits())
3139 return false;
3140
3141 ShiftVal = Amount;
3142 return true;
3143}
3144
3145// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3146// instruction and whether the CC value is descriptive enough to handle
3147// a comparison of type Opcode between the AND result and CmpVal.
3148// CCMask says which comparison result is being tested and BitSize is
3149// the number of bits in the operands. If TEST UNDER MASK can be used,
3150// return the corresponding CC mask, otherwise return 0.
3151static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3152 uint64_t Mask, uint64_t CmpVal,
3153 unsigned ICmpType) {
3154 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3155
3156 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3157 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3158 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3159 return 0;
3160
3161 // Work out the masks for the lowest and highest bits.
3163 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3164
3165 // Signed ordered comparisons are effectively unsigned if the sign
3166 // bit is dropped.
3167 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3168
3169 // Check for equality comparisons with 0, or the equivalent.
3170 if (CmpVal == 0) {
3171 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3173 if (CCMask == SystemZ::CCMASK_CMP_NE)
3175 }
3176 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3177 if (CCMask == SystemZ::CCMASK_CMP_LT)
3179 if (CCMask == SystemZ::CCMASK_CMP_GE)
3181 }
3182 if (EffectivelyUnsigned && CmpVal < Low) {
3183 if (CCMask == SystemZ::CCMASK_CMP_LE)
3185 if (CCMask == SystemZ::CCMASK_CMP_GT)
3187 }
3188
3189 // Check for equality comparisons with the mask, or the equivalent.
3190 if (CmpVal == Mask) {
3191 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3193 if (CCMask == SystemZ::CCMASK_CMP_NE)
3195 }
3196 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3197 if (CCMask == SystemZ::CCMASK_CMP_GT)
3199 if (CCMask == SystemZ::CCMASK_CMP_LE)
3201 }
3202 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3203 if (CCMask == SystemZ::CCMASK_CMP_GE)
3205 if (CCMask == SystemZ::CCMASK_CMP_LT)
3207 }
3208
3209 // Check for ordered comparisons with the top bit.
3210 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3211 if (CCMask == SystemZ::CCMASK_CMP_LE)
3213 if (CCMask == SystemZ::CCMASK_CMP_GT)
3215 }
3216 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3217 if (CCMask == SystemZ::CCMASK_CMP_LT)
3219 if (CCMask == SystemZ::CCMASK_CMP_GE)
3221 }
3222
3223 // If there are just two bits, we can do equality checks for Low and High
3224 // as well.
3225 if (Mask == Low + High) {
3226 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3228 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3230 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3232 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3234 }
3235
3236 // Looks like we've exhausted our options.
3237 return 0;
3238}
3239
3240// See whether C can be implemented as a TEST UNDER MASK instruction.
3241// Update the arguments with the TM version if so.
3243 Comparison &C) {
3244 // Use VECTOR TEST UNDER MASK for i128 operations.
3245 if (C.Op0.getValueType() == MVT::i128) {
3246 // We can use VTM for EQ/NE comparisons of x & y against 0.
3247 if (C.Op0.getOpcode() == ISD::AND &&
3248 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3249 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3250 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3251 if (Mask && Mask->getAPIntValue() == 0) {
3252 C.Opcode = SystemZISD::VTM;
3253 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3254 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3255 C.CCValid = SystemZ::CCMASK_VCMP;
3256 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3257 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3258 else
3259 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3260 }
3261 }
3262 return;
3263 }
3264
3265 // Check that we have a comparison with a constant.
3266 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3267 if (!ConstOp1)
3268 return;
3269 uint64_t CmpVal = ConstOp1->getZExtValue();
3270
3271 // Check whether the nonconstant input is an AND with a constant mask.
3272 Comparison NewC(C);
3273 uint64_t MaskVal;
3274 ConstantSDNode *Mask = nullptr;
3275 if (C.Op0.getOpcode() == ISD::AND) {
3276 NewC.Op0 = C.Op0.getOperand(0);
3277 NewC.Op1 = C.Op0.getOperand(1);
3278 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3279 if (!Mask)
3280 return;
3281 MaskVal = Mask->getZExtValue();
3282 } else {
3283 // There is no instruction to compare with a 64-bit immediate
3284 // so use TMHH instead if possible. We need an unsigned ordered
3285 // comparison with an i64 immediate.
3286 if (NewC.Op0.getValueType() != MVT::i64 ||
3287 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3288 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3289 NewC.ICmpType == SystemZICMP::SignedOnly)
3290 return;
3291 // Convert LE and GT comparisons into LT and GE.
3292 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3293 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3294 if (CmpVal == uint64_t(-1))
3295 return;
3296 CmpVal += 1;
3297 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3298 }
3299 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3300 // be masked off without changing the result.
3301 MaskVal = -(CmpVal & -CmpVal);
3302 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3303 }
3304 if (!MaskVal)
3305 return;
3306
3307 // Check whether the combination of mask, comparison value and comparison
3308 // type are suitable.
3309 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3310 unsigned NewCCMask, ShiftVal;
3311 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3312 NewC.Op0.getOpcode() == ISD::SHL &&
3313 isSimpleShift(NewC.Op0, ShiftVal) &&
3314 (MaskVal >> ShiftVal != 0) &&
3315 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3316 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3317 MaskVal >> ShiftVal,
3318 CmpVal >> ShiftVal,
3319 SystemZICMP::Any))) {
3320 NewC.Op0 = NewC.Op0.getOperand(0);
3321 MaskVal >>= ShiftVal;
3322 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3323 NewC.Op0.getOpcode() == ISD::SRL &&
3324 isSimpleShift(NewC.Op0, ShiftVal) &&
3325 (MaskVal << ShiftVal != 0) &&
3326 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3327 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3328 MaskVal << ShiftVal,
3329 CmpVal << ShiftVal,
3331 NewC.Op0 = NewC.Op0.getOperand(0);
3332 MaskVal <<= ShiftVal;
3333 } else {
3334 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3335 NewC.ICmpType);
3336 if (!NewCCMask)
3337 return;
3338 }
3339
3340 // Go ahead and make the change.
3341 C.Opcode = SystemZISD::TM;
3342 C.Op0 = NewC.Op0;
3343 if (Mask && Mask->getZExtValue() == MaskVal)
3344 C.Op1 = SDValue(Mask, 0);
3345 else
3346 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3347 C.CCValid = SystemZ::CCMASK_TM;
3348 C.CCMask = NewCCMask;
3349}
3350
3351// Implement i128 comparison in vector registers.
3352static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3353 Comparison &C) {
3354 if (C.Opcode != SystemZISD::ICMP)
3355 return;
3356 if (C.Op0.getValueType() != MVT::i128)
3357 return;
3358
3359 // Recognize vector comparison reductions.
3360 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3361 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3362 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3363 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3364 bool CmpNull = isNullConstant(C.Op1);
3365 SDValue Src = peekThroughBitcasts(C.Op0);
3366 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3367 Src = Src.getOperand(0);
3368 CmpNull = !CmpNull;
3369 }
3370 unsigned Opcode = 0;
3371 if (Src.hasOneUse()) {
3372 switch (Src.getOpcode()) {
3373 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3374 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3375 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3376 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3377 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3378 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3379 default: break;
3380 }
3381 }
3382 if (Opcode) {
3383 C.Opcode = Opcode;
3384 C.Op0 = Src->getOperand(0);
3385 C.Op1 = Src->getOperand(1);
3386 C.CCValid = SystemZ::CCMASK_VCMP;
3388 if (!CmpEq)
3389 C.CCMask ^= C.CCValid;
3390 return;
3391 }
3392 }
3393
3394 // Everything below here is not useful if we have native i128 compares.
3395 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3396 return;
3397
3398 // (In-)Equality comparisons can be implemented via VCEQGS.
3399 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3400 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3401 C.Opcode = SystemZISD::VICMPES;
3402 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3403 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3404 C.CCValid = SystemZ::CCMASK_VCMP;
3405 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3406 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3407 else
3408 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3409 return;
3410 }
3411
3412 // Normalize other comparisons to GT.
3413 bool Swap = false, Invert = false;
3414 switch (C.CCMask) {
3415 case SystemZ::CCMASK_CMP_GT: break;
3416 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3417 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3418 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3419 default: llvm_unreachable("Invalid integer condition!");
3420 }
3421 if (Swap)
3422 std::swap(C.Op0, C.Op1);
3423
3424 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3425 C.Opcode = SystemZISD::UCMP128HI;
3426 else
3427 C.Opcode = SystemZISD::SCMP128HI;
3428 C.CCValid = SystemZ::CCMASK_ANY;
3429 C.CCMask = SystemZ::CCMASK_1;
3430
3431 if (Invert)
3432 C.CCMask ^= C.CCValid;
3433}
3434
3435// See whether the comparison argument contains a redundant AND
3436// and remove it if so. This sometimes happens due to the generic
3437// BRCOND expansion.
3439 Comparison &C) {
3440 if (C.Op0.getOpcode() != ISD::AND)
3441 return;
3442 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3443 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3444 return;
3445 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3446 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3447 return;
3448
3449 C.Op0 = C.Op0.getOperand(0);
3450}
3451
3452// Return a Comparison that tests the condition-code result of intrinsic
3453// node Call against constant integer CC using comparison code Cond.
3454// Opcode is the opcode of the SystemZISD operation for the intrinsic
3455// and CCValid is the set of possible condition-code results.
3456static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3457 SDValue Call, unsigned CCValid, uint64_t CC,
3459 Comparison C(Call, SDValue(), SDValue());
3460 C.Opcode = Opcode;
3461 C.CCValid = CCValid;
3462 if (Cond == ISD::SETEQ)
3463 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3464 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3465 else if (Cond == ISD::SETNE)
3466 // ...and the inverse of that.
3467 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3468 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3469 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3470 // always true for CC>3.
3471 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3472 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3473 // ...and the inverse of that.
3474 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3475 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3476 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3477 // always true for CC>3.
3478 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3479 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3480 // ...and the inverse of that.
3481 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3482 else
3483 llvm_unreachable("Unexpected integer comparison type");
3484 C.CCMask &= CCValid;
3485 return C;
3486}
3487
3488// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3489static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3490 ISD::CondCode Cond, const SDLoc &DL,
3491 SDValue Chain = SDValue(),
3492 bool IsSignaling = false) {
3493 if (CmpOp1.getOpcode() == ISD::Constant) {
3494 assert(!Chain);
3495 unsigned Opcode, CCValid;
3496 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3497 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3498 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3499 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3500 CmpOp1->getAsZExtVal(), Cond);
3501 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3502 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3503 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3504 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3505 CmpOp1->getAsZExtVal(), Cond);
3506 }
3507 Comparison C(CmpOp0, CmpOp1, Chain);
3508 C.CCMask = CCMaskForCondCode(Cond);
3509 if (C.Op0.getValueType().isFloatingPoint()) {
3510 C.CCValid = SystemZ::CCMASK_FCMP;
3511 if (!C.Chain)
3512 C.Opcode = SystemZISD::FCMP;
3513 else if (!IsSignaling)
3514 C.Opcode = SystemZISD::STRICT_FCMP;
3515 else
3516 C.Opcode = SystemZISD::STRICT_FCMPS;
3518 } else {
3519 assert(!C.Chain);
3520 C.CCValid = SystemZ::CCMASK_ICMP;
3521 C.Opcode = SystemZISD::ICMP;
3522 // Choose the type of comparison. Equality and inequality tests can
3523 // use either signed or unsigned comparisons. The choice also doesn't
3524 // matter if both sign bits are known to be clear. In those cases we
3525 // want to give the main isel code the freedom to choose whichever
3526 // form fits best.
3527 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3528 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3529 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3530 C.ICmpType = SystemZICMP::Any;
3531 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3532 C.ICmpType = SystemZICMP::UnsignedOnly;
3533 else
3534 C.ICmpType = SystemZICMP::SignedOnly;
3535 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3536 adjustForRedundantAnd(DAG, DL, C);
3537 adjustZeroCmp(DAG, DL, C);
3538 adjustSubwordCmp(DAG, DL, C);
3539 adjustForSubtraction(DAG, DL, C);
3541 adjustICmpTruncate(DAG, DL, C);
3542 }
3543
3544 if (shouldSwapCmpOperands(C)) {
3545 std::swap(C.Op0, C.Op1);
3546 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3547 }
3548
3550 adjustICmp128(DAG, DL, C);
3551 return C;
3552}
3553
3554// Emit the comparison instruction described by C.
3555static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3556 if (!C.Op1.getNode()) {
3557 SDNode *Node;
3558 switch (C.Op0.getOpcode()) {
3560 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3561 return SDValue(Node, 0);
3563 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3564 return SDValue(Node, Node->getNumValues() - 1);
3565 default:
3566 llvm_unreachable("Invalid comparison operands");
3567 }
3568 }
3569 if (C.Opcode == SystemZISD::ICMP)
3570 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3571 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3572 if (C.Opcode == SystemZISD::TM) {
3573 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3575 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3576 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3577 }
3578 if (C.Opcode == SystemZISD::VICMPES ||
3579 C.Opcode == SystemZISD::VICMPHS ||
3580 C.Opcode == SystemZISD::VICMPHLS ||
3581 C.Opcode == SystemZISD::VFCMPES ||
3582 C.Opcode == SystemZISD::VFCMPHS ||
3583 C.Opcode == SystemZISD::VFCMPHES) {
3584 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3585 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3586 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3587 return SDValue(Val.getNode(), 1);
3588 }
3589 if (C.Chain) {
3590 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3591 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3592 }
3593 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3594}
3595
3596// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3597// 64 bits. Extend is the extension type to use. Store the high part
3598// in Hi and the low part in Lo.
3599static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3600 SDValue Op0, SDValue Op1, SDValue &Hi,
3601 SDValue &Lo) {
3602 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3603 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3604 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3605 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3606 DAG.getConstant(32, DL, MVT::i64));
3607 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3608 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3609}
3610
3611// Lower a binary operation that produces two VT results, one in each
3612// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3613// and Opcode performs the GR128 operation. Store the even register result
3614// in Even and the odd register result in Odd.
3615static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3616 unsigned Opcode, SDValue Op0, SDValue Op1,
3617 SDValue &Even, SDValue &Odd) {
3618 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3619 bool Is32Bit = is32Bit(VT);
3620 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3621 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3622}
3623
3624// Return an i32 value that is 1 if the CC value produced by CCReg is
3625// in the mask CCMask and 0 otherwise. CC is known to have a value
3626// in CCValid, so other values can be ignored.
3627static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3628 unsigned CCValid, unsigned CCMask) {
3629 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3630 DAG.getConstant(0, DL, MVT::i32),
3631 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3632 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3633 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3634}
3635
3636// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3637// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3638// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3639// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3640// floating-point comparisons.
3643 switch (CC) {
3644 case ISD::SETOEQ:
3645 case ISD::SETEQ:
3646 switch (Mode) {
3647 case CmpMode::Int: return SystemZISD::VICMPE;
3648 case CmpMode::FP: return SystemZISD::VFCMPE;
3651 }
3652 llvm_unreachable("Bad mode");
3653
3654 case ISD::SETOGE:
3655 case ISD::SETGE:
3656 switch (Mode) {
3657 case CmpMode::Int: return 0;
3658 case CmpMode::FP: return SystemZISD::VFCMPHE;
3661 }
3662 llvm_unreachable("Bad mode");
3663
3664 case ISD::SETOGT:
3665 case ISD::SETGT:
3666 switch (Mode) {
3667 case CmpMode::Int: return SystemZISD::VICMPH;
3668 case CmpMode::FP: return SystemZISD::VFCMPH;
3671 }
3672 llvm_unreachable("Bad mode");
3673
3674 case ISD::SETUGT:
3675 switch (Mode) {
3676 case CmpMode::Int: return SystemZISD::VICMPHL;
3677 case CmpMode::FP: return 0;
3678 case CmpMode::StrictFP: return 0;
3679 case CmpMode::SignalingFP: return 0;
3680 }
3681 llvm_unreachable("Bad mode");
3682
3683 default:
3684 return 0;
3685 }
3686}
3687
3688// Return the SystemZISD vector comparison operation for CC or its inverse,
3689// or 0 if neither can be done directly. Indicate in Invert whether the
3690// result is for the inverse of CC. Mode is as above.
3692 bool &Invert) {
3693 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3694 Invert = false;
3695 return Opcode;
3696 }
3697
3698 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3699 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3700 Invert = true;
3701 return Opcode;
3702 }
3703
3704 return 0;
3705}
3706
3707// Return a v2f64 that contains the extended form of elements Start and Start+1
3708// of v4f32 value Op. If Chain is nonnull, return the strict form.
3709static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3710 SDValue Op, SDValue Chain) {
3711 int Mask[] = { Start, -1, Start + 1, -1 };
3712 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3713 if (Chain) {
3714 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3715 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3716 }
3717 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3718}
3719
3720// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3721// producing a result of type VT. If Chain is nonnull, return the strict form.
3722SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3723 const SDLoc &DL, EVT VT,
3724 SDValue CmpOp0,
3725 SDValue CmpOp1,
3726 SDValue Chain) const {
3727 // There is no hardware support for v4f32 (unless we have the vector
3728 // enhancements facility 1), so extend the vector into two v2f64s
3729 // and compare those.
3730 if (CmpOp0.getValueType() == MVT::v4f32 &&
3731 !Subtarget.hasVectorEnhancements1()) {
3732 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3733 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3734 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3735 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3736 if (Chain) {
3737 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3738 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3739 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3740 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3741 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3742 H1.getValue(1), L1.getValue(1),
3743 HRes.getValue(1), LRes.getValue(1) };
3744 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3745 SDValue Ops[2] = { Res, NewChain };
3746 return DAG.getMergeValues(Ops, DL);
3747 }
3748 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3749 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3750 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3751 }
3752 if (Chain) {
3753 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3754 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3755 }
3756 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3757}
3758
3759// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3760// an integer mask of type VT. If Chain is nonnull, we have a strict
3761// floating-point comparison. If in addition IsSignaling is true, we have
3762// a strict signaling floating-point comparison.
3763SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3764 const SDLoc &DL, EVT VT,
3765 ISD::CondCode CC,
3766 SDValue CmpOp0,
3767 SDValue CmpOp1,
3768 SDValue Chain,
3769 bool IsSignaling) const {
3770 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3771 assert (!Chain || IsFP);
3772 assert (!IsSignaling || Chain);
3773 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3774 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3775 bool Invert = false;
3776 SDValue Cmp;
3777 switch (CC) {
3778 // Handle tests for order using (or (ogt y x) (oge x y)).
3779 case ISD::SETUO:
3780 Invert = true;
3781 [[fallthrough]];
3782 case ISD::SETO: {
3783 assert(IsFP && "Unexpected integer comparison");
3784 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3785 DL, VT, CmpOp1, CmpOp0, Chain);
3786 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3787 DL, VT, CmpOp0, CmpOp1, Chain);
3788 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3789 if (Chain)
3790 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3791 LT.getValue(1), GE.getValue(1));
3792 break;
3793 }
3794
3795 // Handle <> tests using (or (ogt y x) (ogt x y)).
3796 case ISD::SETUEQ:
3797 Invert = true;
3798 [[fallthrough]];
3799 case ISD::SETONE: {
3800 assert(IsFP && "Unexpected integer comparison");
3801 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3802 DL, VT, CmpOp1, CmpOp0, Chain);
3803 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3804 DL, VT, CmpOp0, CmpOp1, Chain);
3805 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3806 if (Chain)
3807 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3808 LT.getValue(1), GT.getValue(1));
3809 break;
3810 }
3811
3812 // Otherwise a single comparison is enough. It doesn't really
3813 // matter whether we try the inversion or the swap first, since
3814 // there are no cases where both work.
3815 default:
3816 // Optimize sign-bit comparisons to signed compares.
3817 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3819 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3820 APInt Mask;
3821 if (CmpOp0.getOpcode() == ISD::AND
3822 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3823 && Mask == APInt::getSignMask(EltSize)) {
3824 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3825 CmpOp0 = CmpOp0.getOperand(0);
3826 }
3827 }
3828 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3829 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3830 else {
3832 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3833 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3834 else
3835 llvm_unreachable("Unhandled comparison");
3836 }
3837 if (Chain)
3838 Chain = Cmp.getValue(1);
3839 break;
3840 }
3841 if (Invert) {
3842 SDValue Mask =
3843 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3844 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3845 }
3846 if (Chain && Chain.getNode() != Cmp.getNode()) {
3847 SDValue Ops[2] = { Cmp, Chain };
3848 Cmp = DAG.getMergeValues(Ops, DL);
3849 }
3850 return Cmp;
3851}
3852
3853SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3854 SelectionDAG &DAG) const {
3855 SDValue CmpOp0 = Op.getOperand(0);
3856 SDValue CmpOp1 = Op.getOperand(1);
3857 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3858 SDLoc DL(Op);
3859 EVT VT = Op.getValueType();
3860 if (VT.isVector())
3861 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3862
3863 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3864 SDValue CCReg = emitCmp(DAG, DL, C);
3865 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3866}
3867
3868SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3869 SelectionDAG &DAG,
3870 bool IsSignaling) const {
3871 SDValue Chain = Op.getOperand(0);
3872 SDValue CmpOp0 = Op.getOperand(1);
3873 SDValue CmpOp1 = Op.getOperand(2);
3874 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3875 SDLoc DL(Op);
3876 EVT VT = Op.getNode()->getValueType(0);
3877 if (VT.isVector()) {
3878 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3879 Chain, IsSignaling);
3880 return Res.getValue(Op.getResNo());
3881 }
3882
3883 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3884 SDValue CCReg = emitCmp(DAG, DL, C);
3885 CCReg->setFlags(Op->getFlags());
3886 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3887 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3888 return DAG.getMergeValues(Ops, DL);
3889}
3890
3891SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3892 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3893 SDValue CmpOp0 = Op.getOperand(2);
3894 SDValue CmpOp1 = Op.getOperand(3);
3895 SDValue Dest = Op.getOperand(4);
3896 SDLoc DL(Op);
3897
3898 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3899 SDValue CCReg = emitCmp(DAG, DL, C);
3900 return DAG.getNode(
3901 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3902 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3903 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3904}
3905
3906// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3907// allowing Pos and Neg to be wider than CmpOp.
3908static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3909 return (Neg.getOpcode() == ISD::SUB &&
3910 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3911 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3912 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3913 Pos.getOperand(0) == CmpOp)));
3914}
3915
3916// Return the absolute or negative absolute of Op; IsNegative decides which.
3918 bool IsNegative) {
3919 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3920 if (IsNegative)
3921 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3922 DAG.getConstant(0, DL, Op.getValueType()), Op);
3923 return Op;
3924}
3925
3927 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3928 EVT VT = MVT::i128;
3929 unsigned Op;
3930
3931 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3932 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3933 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3934 std::swap(TrueOp, FalseOp);
3935 C.CCMask ^= C.CCValid;
3936 }
3937 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3938 std::swap(C.Op0, C.Op1);
3939 C.CCMask = SystemZ::CCMASK_CMP_GT;
3940 }
3941 switch (C.CCMask) {
3944 break;
3946 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3948 else
3950 break;
3951 default:
3952 llvm_unreachable("Unhandled comparison");
3953 break;
3954 }
3955
3956 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
3957 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
3958 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
3959 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
3960}
3961
3962SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3963 SelectionDAG &DAG) const {
3964 SDValue CmpOp0 = Op.getOperand(0);
3965 SDValue CmpOp1 = Op.getOperand(1);
3966 SDValue TrueOp = Op.getOperand(2);
3967 SDValue FalseOp = Op.getOperand(3);
3968 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3969 SDLoc DL(Op);
3970
3971 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
3972 // legalizer, as it will be handled according to the type of the resulting
3973 // value. Extend them here if needed.
3974 if (CmpOp0.getSimpleValueType() == MVT::f16) {
3975 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
3976 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
3977 }
3978
3979 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3980
3981 // Check for absolute and negative-absolute selections, including those
3982 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3983 // This check supplements the one in DAGCombiner.
3984 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3985 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3986 C.Op1.getOpcode() == ISD::Constant &&
3987 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3988 C.Op1->getAsZExtVal() == 0) {
3989 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3990 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3991 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3992 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3993 }
3994
3995 if (Subtarget.hasVectorEnhancements3() &&
3996 C.Opcode == SystemZISD::ICMP &&
3997 C.Op0.getValueType() == MVT::i128 &&
3998 TrueOp.getValueType() == MVT::i128) {
3999 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
4000 }
4001
4002 SDValue CCReg = emitCmp(DAG, DL, C);
4003 SDValue Ops[] = {TrueOp, FalseOp,
4004 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4005 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4006
4007 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4008}
4009
4010SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4011 SelectionDAG &DAG) const {
4012 SDLoc DL(Node);
4013 const GlobalValue *GV = Node->getGlobal();
4014 int64_t Offset = Node->getOffset();
4015 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4017
4019 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4020 if (isInt<32>(Offset)) {
4021 // Assign anchors at 1<<12 byte boundaries.
4022 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4023 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4024 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4025
4026 // The offset can be folded into the address if it is aligned to a
4027 // halfword.
4028 Offset -= Anchor;
4029 if (Offset != 0 && (Offset & 1) == 0) {
4030 SDValue Full =
4031 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4032 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4033 Offset = 0;
4034 }
4035 } else {
4036 // Conservatively load a constant offset greater than 32 bits into a
4037 // register below.
4038 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4039 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4040 }
4041 } else if (Subtarget.isTargetELF()) {
4042 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4043 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4044 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4046 } else if (Subtarget.isTargetzOS()) {
4047 Result = getADAEntry(DAG, GV, DL, PtrVT);
4048 } else
4049 llvm_unreachable("Unexpected Subtarget");
4050
4051 // If there was a non-zero offset that we didn't fold, create an explicit
4052 // addition for it.
4053 if (Offset != 0)
4054 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4055 DAG.getSignedConstant(Offset, DL, PtrVT));
4056
4057 return Result;
4058}
4059
4060SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4061 SelectionDAG &DAG,
4062 unsigned Opcode,
4063 SDValue GOTOffset) const {
4064 SDLoc DL(Node);
4065 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4066 SDValue Chain = DAG.getEntryNode();
4067 SDValue Glue;
4068
4071 report_fatal_error("In GHC calling convention TLS is not supported");
4072
4073 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4074 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4075 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4076 Glue = Chain.getValue(1);
4077 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4078 Glue = Chain.getValue(1);
4079
4080 // The first call operand is the chain and the second is the TLS symbol.
4082 Ops.push_back(Chain);
4083 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4084 Node->getValueType(0),
4085 0, 0));
4086
4087 // Add argument registers to the end of the list so that they are
4088 // known live into the call.
4089 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4090 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4091
4092 // Add a register mask operand representing the call-preserved registers.
4093 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4094 const uint32_t *Mask =
4095 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4096 assert(Mask && "Missing call preserved mask for calling convention");
4097 Ops.push_back(DAG.getRegisterMask(Mask));
4098
4099 // Glue the call to the argument copies.
4100 Ops.push_back(Glue);
4101
4102 // Emit the call.
4103 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4104 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4105 Glue = Chain.getValue(1);
4106
4107 // Copy the return value from %r2.
4108 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4109}
4110
4111SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4112 SelectionDAG &DAG) const {
4113 SDValue Chain = DAG.getEntryNode();
4114 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4115
4116 // The high part of the thread pointer is in access register 0.
4117 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4118 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4119
4120 // The low part of the thread pointer is in access register 1.
4121 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4122 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4123
4124 // Merge them into a single 64-bit address.
4125 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4126 DAG.getConstant(32, DL, PtrVT));
4127 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4128}
4129
4130SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4131 SelectionDAG &DAG) const {
4132 if (DAG.getTarget().useEmulatedTLS())
4133 return LowerToTLSEmulatedModel(Node, DAG);
4134 SDLoc DL(Node);
4135 const GlobalValue *GV = Node->getGlobal();
4136 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4137 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4138
4141 report_fatal_error("In GHC calling convention TLS is not supported");
4142
4143 SDValue TP = lowerThreadPointer(DL, DAG);
4144
4145 // Get the offset of GA from the thread pointer, based on the TLS model.
4147 switch (model) {
4149 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4150 SystemZConstantPoolValue *CPV =
4152
4153 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4154 Offset = DAG.getLoad(
4155 PtrVT, DL, DAG.getEntryNode(), Offset,
4157
4158 // Call __tls_get_offset to retrieve the offset.
4159 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4160 break;
4161 }
4162
4164 // Load the GOT offset of the module ID.
4165 SystemZConstantPoolValue *CPV =
4167
4168 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4169 Offset = DAG.getLoad(
4170 PtrVT, DL, DAG.getEntryNode(), Offset,
4172
4173 // Call __tls_get_offset to retrieve the module base offset.
4174 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4175
4176 // Note: The SystemZLDCleanupPass will remove redundant computations
4177 // of the module base offset. Count total number of local-dynamic
4178 // accesses to trigger execution of that pass.
4179 SystemZMachineFunctionInfo* MFI =
4180 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4182
4183 // Add the per-symbol offset.
4185
4186 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4187 DTPOffset = DAG.getLoad(
4188 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4190
4191 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4192 break;
4193 }
4194
4195 case TLSModel::InitialExec: {
4196 // Load the offset from the GOT.
4197 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4200 Offset =
4201 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4203 break;
4204 }
4205
4206 case TLSModel::LocalExec: {
4207 // Force the offset into the constant pool and load it from there.
4208 SystemZConstantPoolValue *CPV =
4210
4211 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4212 Offset = DAG.getLoad(
4213 PtrVT, DL, DAG.getEntryNode(), Offset,
4215 break;
4216 }
4217 }
4218
4219 // Add the base and offset together.
4220 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4221}
4222
4223SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4224 SelectionDAG &DAG) const {
4225 SDLoc DL(Node);
4226 const BlockAddress *BA = Node->getBlockAddress();
4227 int64_t Offset = Node->getOffset();
4228 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4229
4230 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4231 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4232 return Result;
4233}
4234
4235SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4236 SelectionDAG &DAG) const {
4237 SDLoc DL(JT);
4238 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4239 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4240
4241 // Use LARL to load the address of the table.
4242 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4243}
4244
4245SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4246 SelectionDAG &DAG) const {
4247 SDLoc DL(CP);
4248 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4249
4251 if (CP->isMachineConstantPoolEntry())
4252 Result =
4253 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4254 else
4255 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4256 CP->getOffset());
4257
4258 // Use LARL to load the address of the constant pool entry.
4259 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4260}
4261
4262SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4263 SelectionDAG &DAG) const {
4264 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4265 MachineFunction &MF = DAG.getMachineFunction();
4266 MachineFrameInfo &MFI = MF.getFrameInfo();
4267 MFI.setFrameAddressIsTaken(true);
4268
4269 SDLoc DL(Op);
4270 unsigned Depth = Op.getConstantOperandVal(0);
4271 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4272
4273 // By definition, the frame address is the address of the back chain. (In
4274 // the case of packed stack without backchain, return the address where the
4275 // backchain would have been stored. This will either be an unused space or
4276 // contain a saved register).
4277 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4278 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4279
4280 if (Depth > 0) {
4281 // FIXME The frontend should detect this case.
4282 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4283 report_fatal_error("Unsupported stack frame traversal count");
4284
4285 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4286 while (Depth--) {
4287 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4288 MachinePointerInfo());
4289 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4290 }
4291 }
4292
4293 return BackChain;
4294}
4295
4296SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4297 SelectionDAG &DAG) const {
4298 MachineFunction &MF = DAG.getMachineFunction();
4299 MachineFrameInfo &MFI = MF.getFrameInfo();
4300 MFI.setReturnAddressIsTaken(true);
4301
4302 SDLoc DL(Op);
4303 unsigned Depth = Op.getConstantOperandVal(0);
4304 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4305
4306 if (Depth > 0) {
4307 // FIXME The frontend should detect this case.
4308 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4309 report_fatal_error("Unsupported stack frame traversal count");
4310
4311 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4312 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4313 int Offset = TFL->getReturnAddressOffset(MF);
4314 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4315 DAG.getSignedConstant(Offset, DL, PtrVT));
4316 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4317 MachinePointerInfo());
4318 }
4319
4320 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4321 // implicit live-in.
4322 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4324 &SystemZ::GR64BitRegClass);
4325 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4326}
4327
4328SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4329 SelectionDAG &DAG) const {
4330 SDLoc DL(Op);
4331 SDValue In = Op.getOperand(0);
4332 EVT InVT = In.getValueType();
4333 EVT ResVT = Op.getValueType();
4334
4335 // Convert loads directly. This is normally done by DAGCombiner,
4336 // but we need this case for bitcasts that are created during lowering
4337 // and which are then lowered themselves.
4338 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4339 if (ISD::isNormalLoad(LoadN)) {
4340 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4341 LoadN->getBasePtr(), LoadN->getMemOperand());
4342 // Update the chain uses.
4343 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4344 return NewLoad;
4345 }
4346
4347 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4348 SDValue In64;
4349 if (Subtarget.hasHighWord()) {
4350 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4351 MVT::i64);
4352 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4353 MVT::i64, SDValue(U64, 0), In);
4354 } else {
4355 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4356 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4357 DAG.getConstant(32, DL, MVT::i64));
4358 }
4359 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4360 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4361 DL, MVT::f32, Out64);
4362 }
4363 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4364 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4365 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4366 MVT::f64, SDValue(U64, 0), In);
4367 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4368 if (Subtarget.hasHighWord())
4369 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4370 MVT::i32, Out64);
4371 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4372 DAG.getConstant(32, DL, MVT::i64));
4373 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4374 }
4375 llvm_unreachable("Unexpected bitcast combination");
4376}
4377
4378SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4379 SelectionDAG &DAG) const {
4380
4381 if (Subtarget.isTargetXPLINK64())
4382 return lowerVASTART_XPLINK(Op, DAG);
4383 else
4384 return lowerVASTART_ELF(Op, DAG);
4385}
4386
4387SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4388 SelectionDAG &DAG) const {
4389 MachineFunction &MF = DAG.getMachineFunction();
4390 SystemZMachineFunctionInfo *FuncInfo =
4391 MF.getInfo<SystemZMachineFunctionInfo>();
4392
4393 SDLoc DL(Op);
4394
4395 // vastart just stores the address of the VarArgsFrameIndex slot into the
4396 // memory location argument.
4397 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4398 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4399 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4400 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4401 MachinePointerInfo(SV));
4402}
4403
4404SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4405 SelectionDAG &DAG) const {
4406 MachineFunction &MF = DAG.getMachineFunction();
4407 SystemZMachineFunctionInfo *FuncInfo =
4408 MF.getInfo<SystemZMachineFunctionInfo>();
4409 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4410
4411 SDValue Chain = Op.getOperand(0);
4412 SDValue Addr = Op.getOperand(1);
4413 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4414 SDLoc DL(Op);
4415
4416 // The initial values of each field.
4417 const unsigned NumFields = 4;
4418 SDValue Fields[NumFields] = {
4419 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4420 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4421 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4422 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4423 };
4424
4425 // Store each field into its respective slot.
4426 SDValue MemOps[NumFields];
4427 unsigned Offset = 0;
4428 for (unsigned I = 0; I < NumFields; ++I) {
4429 SDValue FieldAddr = Addr;
4430 if (Offset != 0)
4431 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4433 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4434 MachinePointerInfo(SV, Offset));
4435 Offset += 8;
4436 }
4437 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4438}
4439
4440SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4441 SelectionDAG &DAG) const {
4442 SDValue Chain = Op.getOperand(0);
4443 SDValue DstPtr = Op.getOperand(1);
4444 SDValue SrcPtr = Op.getOperand(2);
4445 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4446 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4447 SDLoc DL(Op);
4448
4449 uint32_t Sz =
4450 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4451 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4452 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4453 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4454 MachinePointerInfo(SrcSV));
4455}
4456
4457SDValue
4458SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4459 SelectionDAG &DAG) const {
4460 if (Subtarget.isTargetXPLINK64())
4461 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4462 else
4463 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4464}
4465
4466SDValue
4467SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4468 SelectionDAG &DAG) const {
4469 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4470 MachineFunction &MF = DAG.getMachineFunction();
4471 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4472 SDValue Chain = Op.getOperand(0);
4473 SDValue Size = Op.getOperand(1);
4474 SDValue Align = Op.getOperand(2);
4475 SDLoc DL(Op);
4476
4477 // If user has set the no alignment function attribute, ignore
4478 // alloca alignments.
4479 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4480
4481 uint64_t StackAlign = TFI->getStackAlignment();
4482 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4483 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4484
4485 SDValue NeededSpace = Size;
4486
4487 // Add extra space for alignment if needed.
4488 EVT PtrVT = getPointerTy(MF.getDataLayout());
4489 if (ExtraAlignSpace)
4490 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4491 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4492
4493 bool IsSigned = false;
4494 bool DoesNotReturn = false;
4495 bool IsReturnValueUsed = false;
4496 EVT VT = Op.getValueType();
4497 SDValue AllocaCall =
4498 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4499 CallingConv::C, IsSigned, DL, DoesNotReturn,
4500 IsReturnValueUsed)
4501 .first;
4502
4503 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4504 // to end of call in order to ensure it isn't broken up from the call
4505 // sequence.
4506 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4507 Register SPReg = Regs.getStackPointerRegister();
4508 Chain = AllocaCall.getValue(1);
4509 SDValue Glue = AllocaCall.getValue(2);
4510 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4511 Chain = NewSPRegNode.getValue(1);
4512
4513 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4514 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4515 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4516
4517 // Dynamically realign if needed.
4518 if (ExtraAlignSpace) {
4519 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4520 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4521 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4522 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4523 }
4524
4525 SDValue Ops[2] = {Result, Chain};
4526 return DAG.getMergeValues(Ops, DL);
4527}
4528
4529SDValue
4530SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4531 SelectionDAG &DAG) const {
4532 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4533 MachineFunction &MF = DAG.getMachineFunction();
4534 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4535 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4536
4537 SDValue Chain = Op.getOperand(0);
4538 SDValue Size = Op.getOperand(1);
4539 SDValue Align = Op.getOperand(2);
4540 SDLoc DL(Op);
4541
4542 // If user has set the no alignment function attribute, ignore
4543 // alloca alignments.
4544 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4545
4546 uint64_t StackAlign = TFI->getStackAlignment();
4547 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4548 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4549
4551 SDValue NeededSpace = Size;
4552
4553 // Get a reference to the stack pointer.
4554 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4555
4556 // If we need a backchain, save it now.
4557 SDValue Backchain;
4558 if (StoreBackchain)
4559 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4560 MachinePointerInfo());
4561
4562 // Add extra space for alignment if needed.
4563 if (ExtraAlignSpace)
4564 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4565 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4566
4567 // Get the new stack pointer value.
4568 SDValue NewSP;
4569 if (hasInlineStackProbe(MF)) {
4571 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4572 Chain = NewSP.getValue(1);
4573 }
4574 else {
4575 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4576 // Copy the new stack pointer back.
4577 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4578 }
4579
4580 // The allocated data lives above the 160 bytes allocated for the standard
4581 // frame, plus any outgoing stack arguments. We don't know how much that
4582 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4583 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4584 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4585
4586 // Dynamically realign if needed.
4587 if (RequiredAlign > StackAlign) {
4588 Result =
4589 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4590 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4591 Result =
4592 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4593 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4594 }
4595
4596 if (StoreBackchain)
4597 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4598 MachinePointerInfo());
4599
4600 SDValue Ops[2] = { Result, Chain };
4601 return DAG.getMergeValues(Ops, DL);
4602}
4603
4604SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4605 SDValue Op, SelectionDAG &DAG) const {
4606 SDLoc DL(Op);
4607
4608 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4609}
4610
4611SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4612 SelectionDAG &DAG,
4613 unsigned Opcode) const {
4614 EVT VT = Op.getValueType();
4615 SDLoc DL(Op);
4616 SDValue Even, Odd;
4617
4618 // This custom expander is only used on z17 and later for 64-bit types.
4619 assert(!is32Bit(VT));
4620 assert(Subtarget.hasMiscellaneousExtensions2());
4621
4622 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4623 // the high result in the even register. Return the latter.
4624 lowerGR128Binary(DAG, DL, VT, Opcode,
4625 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4626 return Even;
4627}
4628
4629SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4630 SelectionDAG &DAG) const {
4631 EVT VT = Op.getValueType();
4632 SDLoc DL(Op);
4633 SDValue Ops[2];
4634 if (is32Bit(VT))
4635 // Just do a normal 64-bit multiplication and extract the results.
4636 // We define this so that it can be used for constant division.
4637 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4638 Op.getOperand(1), Ops[1], Ops[0]);
4639 else if (Subtarget.hasMiscellaneousExtensions2())
4640 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4641 // the high result in the even register. ISD::SMUL_LOHI is defined to
4642 // return the low half first, so the results are in reverse order.
4644 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4645 else {
4646 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4647 //
4648 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4649 //
4650 // but using the fact that the upper halves are either all zeros
4651 // or all ones:
4652 //
4653 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4654 //
4655 // and grouping the right terms together since they are quicker than the
4656 // multiplication:
4657 //
4658 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4659 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4660 SDValue LL = Op.getOperand(0);
4661 SDValue RL = Op.getOperand(1);
4662 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4663 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4664 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4665 // the high result in the even register. ISD::SMUL_LOHI is defined to
4666 // return the low half first, so the results are in reverse order.
4668 LL, RL, Ops[1], Ops[0]);
4669 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4670 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4671 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4672 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4673 }
4674 return DAG.getMergeValues(Ops, DL);
4675}
4676
4677SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4678 SelectionDAG &DAG) const {
4679 EVT VT = Op.getValueType();
4680 SDLoc DL(Op);
4681 SDValue Ops[2];
4682 if (is32Bit(VT))
4683 // Just do a normal 64-bit multiplication and extract the results.
4684 // We define this so that it can be used for constant division.
4685 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4686 Op.getOperand(1), Ops[1], Ops[0]);
4687 else
4688 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4689 // the high result in the even register. ISD::UMUL_LOHI is defined to
4690 // return the low half first, so the results are in reverse order.
4692 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4693 return DAG.getMergeValues(Ops, DL);
4694}
4695
4696SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4697 SelectionDAG &DAG) const {
4698 SDValue Op0 = Op.getOperand(0);
4699 SDValue Op1 = Op.getOperand(1);
4700 EVT VT = Op.getValueType();
4701 SDLoc DL(Op);
4702
4703 // We use DSGF for 32-bit division. This means the first operand must
4704 // always be 64-bit, and the second operand should be 32-bit whenever
4705 // that is possible, to improve performance.
4706 if (is32Bit(VT))
4707 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4708 else if (DAG.ComputeNumSignBits(Op1) > 32)
4709 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4710
4711 // DSG(F) returns the remainder in the even register and the
4712 // quotient in the odd register.
4713 SDValue Ops[2];
4714 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4715 return DAG.getMergeValues(Ops, DL);
4716}
4717
4718SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4719 SelectionDAG &DAG) const {
4720 EVT VT = Op.getValueType();
4721 SDLoc DL(Op);
4722
4723 // DL(G) returns the remainder in the even register and the
4724 // quotient in the odd register.
4725 SDValue Ops[2];
4727 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4728 return DAG.getMergeValues(Ops, DL);
4729}
4730
4731SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4732 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4733
4734 // Get the known-zero masks for each operand.
4735 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4736 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4737 DAG.computeKnownBits(Ops[1])};
4738
4739 // See if the upper 32 bits of one operand and the lower 32 bits of the
4740 // other are known zero. They are the low and high operands respectively.
4741 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4742 Known[1].Zero.getZExtValue() };
4743 unsigned High, Low;
4744 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4745 High = 1, Low = 0;
4746 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4747 High = 0, Low = 1;
4748 else
4749 return Op;
4750
4751 SDValue LowOp = Ops[Low];
4752 SDValue HighOp = Ops[High];
4753
4754 // If the high part is a constant, we're better off using IILH.
4755 if (HighOp.getOpcode() == ISD::Constant)
4756 return Op;
4757
4758 // If the low part is a constant that is outside the range of LHI,
4759 // then we're better off using IILF.
4760 if (LowOp.getOpcode() == ISD::Constant) {
4761 int64_t Value = int32_t(LowOp->getAsZExtVal());
4762 if (!isInt<16>(Value))
4763 return Op;
4764 }
4765
4766 // Check whether the high part is an AND that doesn't change the
4767 // high 32 bits and just masks out low bits. We can skip it if so.
4768 if (HighOp.getOpcode() == ISD::AND &&
4769 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4770 SDValue HighOp0 = HighOp.getOperand(0);
4771 uint64_t Mask = HighOp.getConstantOperandVal(1);
4772 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4773 HighOp = HighOp0;
4774 }
4775
4776 // Take advantage of the fact that all GR32 operations only change the
4777 // low 32 bits by truncating Low to an i32 and inserting it directly
4778 // using a subreg. The interesting cases are those where the truncation
4779 // can be folded.
4780 SDLoc DL(Op);
4781 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4782 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4783 MVT::i64, HighOp, Low32);
4784}
4785
4786// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4787SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4788 SelectionDAG &DAG) const {
4789 SDNode *N = Op.getNode();
4790 SDValue LHS = N->getOperand(0);
4791 SDValue RHS = N->getOperand(1);
4792 SDLoc DL(N);
4793
4794 if (N->getValueType(0) == MVT::i128) {
4795 unsigned BaseOp = 0;
4796 unsigned FlagOp = 0;
4797 bool IsBorrow = false;
4798 switch (Op.getOpcode()) {
4799 default: llvm_unreachable("Unknown instruction!");
4800 case ISD::UADDO:
4801 BaseOp = ISD::ADD;
4802 FlagOp = SystemZISD::VACC;
4803 break;
4804 case ISD::USUBO:
4805 BaseOp = ISD::SUB;
4806 FlagOp = SystemZISD::VSCBI;
4807 IsBorrow = true;
4808 break;
4809 }
4810 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4811 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4812 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4813 DAG.getValueType(MVT::i1));
4814 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4815 if (IsBorrow)
4816 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4817 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4818 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4819 }
4820
4821 unsigned BaseOp = 0;
4822 unsigned CCValid = 0;
4823 unsigned CCMask = 0;
4824
4825 switch (Op.getOpcode()) {
4826 default: llvm_unreachable("Unknown instruction!");
4827 case ISD::SADDO:
4828 BaseOp = SystemZISD::SADDO;
4829 CCValid = SystemZ::CCMASK_ARITH;
4831 break;
4832 case ISD::SSUBO:
4833 BaseOp = SystemZISD::SSUBO;
4834 CCValid = SystemZ::CCMASK_ARITH;
4836 break;
4837 case ISD::UADDO:
4838 BaseOp = SystemZISD::UADDO;
4839 CCValid = SystemZ::CCMASK_LOGICAL;
4841 break;
4842 case ISD::USUBO:
4843 BaseOp = SystemZISD::USUBO;
4844 CCValid = SystemZ::CCMASK_LOGICAL;
4846 break;
4847 }
4848
4849 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4850 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4851
4852 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4853 if (N->getValueType(1) == MVT::i1)
4854 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4855
4856 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4857}
4858
4859static bool isAddCarryChain(SDValue Carry) {
4860 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4861 Carry->getValueType(0) != MVT::i128)
4862 Carry = Carry.getOperand(2);
4863 return Carry.getOpcode() == ISD::UADDO &&
4864 Carry->getValueType(0) != MVT::i128;
4865}
4866
4867static bool isSubBorrowChain(SDValue Carry) {
4868 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4869 Carry->getValueType(0) != MVT::i128)
4870 Carry = Carry.getOperand(2);
4871 return Carry.getOpcode() == ISD::USUBO &&
4872 Carry->getValueType(0) != MVT::i128;
4873}
4874
4875// Lower UADDO_CARRY/USUBO_CARRY nodes.
4876SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4877 SelectionDAG &DAG) const {
4878
4879 SDNode *N = Op.getNode();
4880 MVT VT = N->getSimpleValueType(0);
4881
4882 // Let legalize expand this if it isn't a legal type yet.
4883 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4884 return SDValue();
4885
4886 SDValue LHS = N->getOperand(0);
4887 SDValue RHS = N->getOperand(1);
4888 SDValue Carry = Op.getOperand(2);
4889 SDLoc DL(N);
4890
4891 if (VT == MVT::i128) {
4892 unsigned BaseOp = 0;
4893 unsigned FlagOp = 0;
4894 bool IsBorrow = false;
4895 switch (Op.getOpcode()) {
4896 default: llvm_unreachable("Unknown instruction!");
4897 case ISD::UADDO_CARRY:
4898 BaseOp = SystemZISD::VAC;
4899 FlagOp = SystemZISD::VACCC;
4900 break;
4901 case ISD::USUBO_CARRY:
4902 BaseOp = SystemZISD::VSBI;
4903 FlagOp = SystemZISD::VSBCBI;
4904 IsBorrow = true;
4905 break;
4906 }
4907 if (IsBorrow)
4908 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4909 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4910 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4911 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4912 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4913 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4914 DAG.getValueType(MVT::i1));
4915 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4916 if (IsBorrow)
4917 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4918 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4919 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4920 }
4921
4922 unsigned BaseOp = 0;
4923 unsigned CCValid = 0;
4924 unsigned CCMask = 0;
4925
4926 switch (Op.getOpcode()) {
4927 default: llvm_unreachable("Unknown instruction!");
4928 case ISD::UADDO_CARRY:
4929 if (!isAddCarryChain(Carry))
4930 return SDValue();
4931
4932 BaseOp = SystemZISD::ADDCARRY;
4933 CCValid = SystemZ::CCMASK_LOGICAL;
4935 break;
4936 case ISD::USUBO_CARRY:
4937 if (!isSubBorrowChain(Carry))
4938 return SDValue();
4939
4940 BaseOp = SystemZISD::SUBCARRY;
4941 CCValid = SystemZ::CCMASK_LOGICAL;
4943 break;
4944 }
4945
4946 // Set the condition code from the carry flag.
4947 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4948 DAG.getConstant(CCValid, DL, MVT::i32),
4949 DAG.getConstant(CCMask, DL, MVT::i32));
4950
4951 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4952 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4953
4954 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4955 if (N->getValueType(1) == MVT::i1)
4956 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4957
4958 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4959}
4960
4961SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4962 SelectionDAG &DAG) const {
4963 EVT VT = Op.getValueType();
4964 SDLoc DL(Op);
4965 Op = Op.getOperand(0);
4966
4967 if (VT.getScalarSizeInBits() == 128) {
4968 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4969 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4970 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4971 DAG.getConstant(0, DL, MVT::i64));
4972 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4973 return Op;
4974 }
4975
4976 // Handle vector types via VPOPCT.
4977 if (VT.isVector()) {
4978 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4979 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4980 switch (VT.getScalarSizeInBits()) {
4981 case 8:
4982 break;
4983 case 16: {
4984 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4985 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4986 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4987 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4988 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4989 break;
4990 }
4991 case 32: {
4992 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4993 DAG.getConstant(0, DL, MVT::i32));
4994 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4995 break;
4996 }
4997 case 64: {
4998 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4999 DAG.getConstant(0, DL, MVT::i32));
5000 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
5001 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5002 break;
5003 }
5004 default:
5005 llvm_unreachable("Unexpected type");
5006 }
5007 return Op;
5008 }
5009
5010 // Get the known-zero mask for the operand.
5011 KnownBits Known = DAG.computeKnownBits(Op);
5012 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5013 if (NumSignificantBits == 0)
5014 return DAG.getConstant(0, DL, VT);
5015
5016 // Skip known-zero high parts of the operand.
5017 int64_t OrigBitSize = VT.getSizeInBits();
5018 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5019 BitSize = std::min(BitSize, OrigBitSize);
5020
5021 // The POPCNT instruction counts the number of bits in each byte.
5022 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5023 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5024 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5025
5026 // Add up per-byte counts in a binary tree. All bits of Op at
5027 // position larger than BitSize remain zero throughout.
5028 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5029 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5030 if (BitSize != OrigBitSize)
5031 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5032 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5033 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5034 }
5035
5036 // Extract overall result from high byte.
5037 if (BitSize > 8)
5038 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5039 DAG.getConstant(BitSize - 8, DL, VT));
5040
5041 return Op;
5042}
5043
5044SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5045 SelectionDAG &DAG) const {
5046 SDLoc DL(Op);
5047 AtomicOrdering FenceOrdering =
5048 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5049 SyncScope::ID FenceSSID =
5050 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5051
5052 // The only fence that needs an instruction is a sequentially-consistent
5053 // cross-thread fence.
5054 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5055 FenceSSID == SyncScope::System) {
5056 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5057 Op.getOperand(0)),
5058 0);
5059 }
5060
5061 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5062 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5063}
5064
5065SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5066 SelectionDAG &DAG) const {
5067 EVT RegVT = Op.getValueType();
5068 if (RegVT.getSizeInBits() == 128)
5069 return lowerATOMIC_LDST_I128(Op, DAG);
5070 return lowerLoadF16(Op, DAG);
5071}
5072
5073SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5074 SelectionDAG &DAG) const {
5075 auto *Node = cast<AtomicSDNode>(Op.getNode());
5076 if (Node->getMemoryVT().getSizeInBits() == 128)
5077 return lowerATOMIC_LDST_I128(Op, DAG);
5078 return lowerStoreF16(Op, DAG);
5079}
5080
5081SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5082 SelectionDAG &DAG) const {
5083 auto *Node = cast<AtomicSDNode>(Op.getNode());
5084 assert(
5085 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5086 "Only custom lowering i128 or f128.");
5087 // Use same code to handle both legal and non-legal i128 types.
5089 LowerOperationWrapper(Node, Results, DAG);
5090 return DAG.getMergeValues(Results, SDLoc(Op));
5091}
5092
5093// Prepare for a Compare And Swap for a subword operation. This needs to be
5094// done in memory with 4 bytes at natural alignment.
5096 SDValue &AlignedAddr, SDValue &BitShift,
5097 SDValue &NegBitShift) {
5098 EVT PtrVT = Addr.getValueType();
5099 EVT WideVT = MVT::i32;
5100
5101 // Get the address of the containing word.
5102 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5103 DAG.getSignedConstant(-4, DL, PtrVT));
5104
5105 // Get the number of bits that the word must be rotated left in order
5106 // to bring the field to the top bits of a GR32.
5107 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5108 DAG.getConstant(3, DL, PtrVT));
5109 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5110
5111 // Get the complementing shift amount, for rotating a field in the top
5112 // bits back to its proper position.
5113 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5114 DAG.getConstant(0, DL, WideVT), BitShift);
5115
5116}
5117
5118// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5119// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5120SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5121 SelectionDAG &DAG,
5122 unsigned Opcode) const {
5123 auto *Node = cast<AtomicSDNode>(Op.getNode());
5124
5125 // 32-bit operations need no special handling.
5126 EVT NarrowVT = Node->getMemoryVT();
5127 EVT WideVT = MVT::i32;
5128 if (NarrowVT == WideVT)
5129 return Op;
5130
5131 int64_t BitSize = NarrowVT.getSizeInBits();
5132 SDValue ChainIn = Node->getChain();
5133 SDValue Addr = Node->getBasePtr();
5134 SDValue Src2 = Node->getVal();
5135 MachineMemOperand *MMO = Node->getMemOperand();
5136 SDLoc DL(Node);
5137
5138 // Convert atomic subtracts of constants into additions.
5139 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5140 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5142 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5143 Src2.getValueType());
5144 }
5145
5146 SDValue AlignedAddr, BitShift, NegBitShift;
5147 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5148
5149 // Extend the source operand to 32 bits and prepare it for the inner loop.
5150 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5151 // operations require the source to be shifted in advance. (This shift
5152 // can be folded if the source is constant.) For AND and NAND, the lower
5153 // bits must be set, while for other opcodes they should be left clear.
5154 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5155 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5156 DAG.getConstant(32 - BitSize, DL, WideVT));
5157 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5159 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5160 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5161
5162 // Construct the ATOMIC_LOADW_* node.
5163 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5164 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5165 DAG.getConstant(BitSize, DL, WideVT) };
5166 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5167 NarrowVT, MMO);
5168
5169 // Rotate the result of the final CS so that the field is in the lower
5170 // bits of a GR32, then truncate it.
5171 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5172 DAG.getConstant(BitSize, DL, WideVT));
5173 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5174
5175 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5176 return DAG.getMergeValues(RetOps, DL);
5177}
5178
5179// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5180// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5181SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5182 SelectionDAG &DAG) const {
5183 auto *Node = cast<AtomicSDNode>(Op.getNode());
5184 EVT MemVT = Node->getMemoryVT();
5185 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5186 // A full-width operation: negate and use LAA(G).
5187 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5188 assert(Subtarget.hasInterlockedAccess1() &&
5189 "Should have been expanded by AtomicExpand pass.");
5190 SDValue Src2 = Node->getVal();
5191 SDLoc DL(Src2);
5192 SDValue NegSrc2 =
5193 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5194 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5195 Node->getChain(), Node->getBasePtr(), NegSrc2,
5196 Node->getMemOperand());
5197 }
5198
5199 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5200}
5201
5202// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5203SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5204 SelectionDAG &DAG) const {
5205 auto *Node = cast<AtomicSDNode>(Op.getNode());
5206 SDValue ChainIn = Node->getOperand(0);
5207 SDValue Addr = Node->getOperand(1);
5208 SDValue CmpVal = Node->getOperand(2);
5209 SDValue SwapVal = Node->getOperand(3);
5210 MachineMemOperand *MMO = Node->getMemOperand();
5211 SDLoc DL(Node);
5212
5213 if (Node->getMemoryVT() == MVT::i128) {
5214 // Use same code to handle both legal and non-legal i128 types.
5216 LowerOperationWrapper(Node, Results, DAG);
5217 return DAG.getMergeValues(Results, DL);
5218 }
5219
5220 // We have native support for 32-bit and 64-bit compare and swap, but we
5221 // still need to expand extracting the "success" result from the CC.
5222 EVT NarrowVT = Node->getMemoryVT();
5223 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5224 if (NarrowVT == WideVT) {
5225 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5226 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5228 DL, Tys, Ops, NarrowVT, MMO);
5229 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5231
5232 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5233 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5234 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5235 return SDValue();
5236 }
5237
5238 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5239 // via a fullword ATOMIC_CMP_SWAPW operation.
5240 int64_t BitSize = NarrowVT.getSizeInBits();
5241
5242 SDValue AlignedAddr, BitShift, NegBitShift;
5243 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5244
5245 // Construct the ATOMIC_CMP_SWAPW node.
5246 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5247 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5248 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5250 VTList, Ops, NarrowVT, MMO);
5251 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5253
5254 // emitAtomicCmpSwapW() will zero extend the result (original value).
5255 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5256 DAG.getValueType(NarrowVT));
5257 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5258 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5259 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5260 return SDValue();
5261}
5262
5264SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5265 // Because of how we convert atomic_load and atomic_store to normal loads and
5266 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5267 // since DAGCombine hasn't been updated to account for atomic, but non
5268 // volatile loads. (See D57601)
5269 if (auto *SI = dyn_cast<StoreInst>(&I))
5270 if (SI->isAtomic())
5272 if (auto *LI = dyn_cast<LoadInst>(&I))
5273 if (LI->isAtomic())
5275 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5276 if (AI->isAtomic())
5278 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5279 if (AI->isAtomic())
5282}
5283
5284SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5285 SelectionDAG &DAG) const {
5286 MachineFunction &MF = DAG.getMachineFunction();
5287 auto *Regs = Subtarget.getSpecialRegisters();
5289 report_fatal_error("Variable-sized stack allocations are not supported "
5290 "in GHC calling convention");
5291 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5292 Regs->getStackPointerRegister(), Op.getValueType());
5293}
5294
5295SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5296 SelectionDAG &DAG) const {
5297 MachineFunction &MF = DAG.getMachineFunction();
5298 auto *Regs = Subtarget.getSpecialRegisters();
5299 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5300
5302 report_fatal_error("Variable-sized stack allocations are not supported "
5303 "in GHC calling convention");
5304
5305 SDValue Chain = Op.getOperand(0);
5306 SDValue NewSP = Op.getOperand(1);
5307 SDValue Backchain;
5308 SDLoc DL(Op);
5309
5310 if (StoreBackchain) {
5311 SDValue OldSP = DAG.getCopyFromReg(
5312 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5313 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5314 MachinePointerInfo());
5315 }
5316
5317 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5318
5319 if (StoreBackchain)
5320 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5321 MachinePointerInfo());
5322
5323 return Chain;
5324}
5325
5326SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5327 SelectionDAG &DAG) const {
5328 bool IsData = Op.getConstantOperandVal(4);
5329 if (!IsData)
5330 // Just preserve the chain.
5331 return Op.getOperand(0);
5332
5333 SDLoc DL(Op);
5334 bool IsWrite = Op.getConstantOperandVal(2);
5335 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5336 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5337 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5338 Op.getOperand(1)};
5340 Node->getVTList(), Ops,
5341 Node->getMemoryVT(), Node->getMemOperand());
5342}
5343
5344SDValue
5345SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5346 SelectionDAG &DAG) const {
5347 unsigned Opcode, CCValid;
5348 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5349 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5350 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5351 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5352 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5353 return SDValue();
5354 }
5355
5356 return SDValue();
5357}
5358
5359SDValue
5360SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5361 SelectionDAG &DAG) const {
5362 unsigned Opcode, CCValid;
5363 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5364 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5365 if (Op->getNumValues() == 1)
5366 return getCCResult(DAG, SDValue(Node, 0));
5367 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5368 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5369 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5370 }
5371
5372 unsigned Id = Op.getConstantOperandVal(0);
5373 switch (Id) {
5374 case Intrinsic::thread_pointer:
5375 return lowerThreadPointer(SDLoc(Op), DAG);
5376
5377 case Intrinsic::s390_vpdi:
5378 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5379 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5380
5381 case Intrinsic::s390_vperm:
5382 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5383 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5384
5385 case Intrinsic::s390_vuphb:
5386 case Intrinsic::s390_vuphh:
5387 case Intrinsic::s390_vuphf:
5388 case Intrinsic::s390_vuphg:
5389 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5390 Op.getOperand(1));
5391
5392 case Intrinsic::s390_vuplhb:
5393 case Intrinsic::s390_vuplhh:
5394 case Intrinsic::s390_vuplhf:
5395 case Intrinsic::s390_vuplhg:
5396 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5397 Op.getOperand(1));
5398
5399 case Intrinsic::s390_vuplb:
5400 case Intrinsic::s390_vuplhw:
5401 case Intrinsic::s390_vuplf:
5402 case Intrinsic::s390_vuplg:
5403 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5404 Op.getOperand(1));
5405
5406 case Intrinsic::s390_vupllb:
5407 case Intrinsic::s390_vupllh:
5408 case Intrinsic::s390_vupllf:
5409 case Intrinsic::s390_vupllg:
5410 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5411 Op.getOperand(1));
5412
5413 case Intrinsic::s390_vsumb:
5414 case Intrinsic::s390_vsumh:
5415 case Intrinsic::s390_vsumgh:
5416 case Intrinsic::s390_vsumgf:
5417 case Intrinsic::s390_vsumqf:
5418 case Intrinsic::s390_vsumqg:
5419 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5420 Op.getOperand(1), Op.getOperand(2));
5421
5422 case Intrinsic::s390_vaq:
5423 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5424 Op.getOperand(1), Op.getOperand(2));
5425 case Intrinsic::s390_vaccb:
5426 case Intrinsic::s390_vacch:
5427 case Intrinsic::s390_vaccf:
5428 case Intrinsic::s390_vaccg:
5429 case Intrinsic::s390_vaccq:
5430 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5431 Op.getOperand(1), Op.getOperand(2));
5432 case Intrinsic::s390_vacq:
5433 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5434 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5435 case Intrinsic::s390_vacccq:
5436 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5437 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5438
5439 case Intrinsic::s390_vsq:
5440 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5441 Op.getOperand(1), Op.getOperand(2));
5442 case Intrinsic::s390_vscbib:
5443 case Intrinsic::s390_vscbih:
5444 case Intrinsic::s390_vscbif:
5445 case Intrinsic::s390_vscbig:
5446 case Intrinsic::s390_vscbiq:
5447 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5448 Op.getOperand(1), Op.getOperand(2));
5449 case Intrinsic::s390_vsbiq:
5450 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5451 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5452 case Intrinsic::s390_vsbcbiq:
5453 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5454 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5455
5456 case Intrinsic::s390_vmhb:
5457 case Intrinsic::s390_vmhh:
5458 case Intrinsic::s390_vmhf:
5459 case Intrinsic::s390_vmhg:
5460 case Intrinsic::s390_vmhq:
5461 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5462 Op.getOperand(1), Op.getOperand(2));
5463 case Intrinsic::s390_vmlhb:
5464 case Intrinsic::s390_vmlhh:
5465 case Intrinsic::s390_vmlhf:
5466 case Intrinsic::s390_vmlhg:
5467 case Intrinsic::s390_vmlhq:
5468 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5469 Op.getOperand(1), Op.getOperand(2));
5470
5471 case Intrinsic::s390_vmahb:
5472 case Intrinsic::s390_vmahh:
5473 case Intrinsic::s390_vmahf:
5474 case Intrinsic::s390_vmahg:
5475 case Intrinsic::s390_vmahq:
5476 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5477 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5478 case Intrinsic::s390_vmalhb:
5479 case Intrinsic::s390_vmalhh:
5480 case Intrinsic::s390_vmalhf:
5481 case Intrinsic::s390_vmalhg:
5482 case Intrinsic::s390_vmalhq:
5483 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5484 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5485
5486 case Intrinsic::s390_vmeb:
5487 case Intrinsic::s390_vmeh:
5488 case Intrinsic::s390_vmef:
5489 case Intrinsic::s390_vmeg:
5490 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5491 Op.getOperand(1), Op.getOperand(2));
5492 case Intrinsic::s390_vmleb:
5493 case Intrinsic::s390_vmleh:
5494 case Intrinsic::s390_vmlef:
5495 case Intrinsic::s390_vmleg:
5496 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5497 Op.getOperand(1), Op.getOperand(2));
5498 case Intrinsic::s390_vmob:
5499 case Intrinsic::s390_vmoh:
5500 case Intrinsic::s390_vmof:
5501 case Intrinsic::s390_vmog:
5502 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5503 Op.getOperand(1), Op.getOperand(2));
5504 case Intrinsic::s390_vmlob:
5505 case Intrinsic::s390_vmloh:
5506 case Intrinsic::s390_vmlof:
5507 case Intrinsic::s390_vmlog:
5508 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5509 Op.getOperand(1), Op.getOperand(2));
5510
5511 case Intrinsic::s390_vmaeb:
5512 case Intrinsic::s390_vmaeh:
5513 case Intrinsic::s390_vmaef:
5514 case Intrinsic::s390_vmaeg:
5515 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5516 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5517 Op.getOperand(1), Op.getOperand(2)),
5518 Op.getOperand(3));
5519 case Intrinsic::s390_vmaleb:
5520 case Intrinsic::s390_vmaleh:
5521 case Intrinsic::s390_vmalef:
5522 case Intrinsic::s390_vmaleg:
5523 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5524 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5525 Op.getOperand(1), Op.getOperand(2)),
5526 Op.getOperand(3));
5527 case Intrinsic::s390_vmaob:
5528 case Intrinsic::s390_vmaoh:
5529 case Intrinsic::s390_vmaof:
5530 case Intrinsic::s390_vmaog:
5531 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5532 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5533 Op.getOperand(1), Op.getOperand(2)),
5534 Op.getOperand(3));
5535 case Intrinsic::s390_vmalob:
5536 case Intrinsic::s390_vmaloh:
5537 case Intrinsic::s390_vmalof:
5538 case Intrinsic::s390_vmalog:
5539 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5540 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5541 Op.getOperand(1), Op.getOperand(2)),
5542 Op.getOperand(3));
5543 }
5544
5545 return SDValue();
5546}
5547
5548namespace {
5549// Says that SystemZISD operation Opcode can be used to perform the equivalent
5550// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5551// Operand is the constant third operand, otherwise it is the number of
5552// bytes in each element of the result.
5553struct Permute {
5554 unsigned Opcode;
5555 unsigned Operand;
5556 unsigned char Bytes[SystemZ::VectorBytes];
5557};
5558}
5559
5560static const Permute PermuteForms[] = {
5561 // VMRHG
5563 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5564 // VMRHF
5566 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5567 // VMRHH
5569 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5570 // VMRHB
5572 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5573 // VMRLG
5575 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5576 // VMRLF
5578 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5579 // VMRLH
5581 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5582 // VMRLB
5584 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5585 // VPKG
5586 { SystemZISD::PACK, 4,
5587 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5588 // VPKF
5589 { SystemZISD::PACK, 2,
5590 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5591 // VPKH
5592 { SystemZISD::PACK, 1,
5593 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5594 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5596 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5597 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5599 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5600};
5601
5602// Called after matching a vector shuffle against a particular pattern.
5603// Both the original shuffle and the pattern have two vector operands.
5604// OpNos[0] is the operand of the original shuffle that should be used for
5605// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5606// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5607// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5608// for operands 0 and 1 of the pattern.
5609static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5610 if (OpNos[0] < 0) {
5611 if (OpNos[1] < 0)
5612 return false;
5613 OpNo0 = OpNo1 = OpNos[1];
5614 } else if (OpNos[1] < 0) {
5615 OpNo0 = OpNo1 = OpNos[0];
5616 } else {
5617 OpNo0 = OpNos[0];
5618 OpNo1 = OpNos[1];
5619 }
5620 return true;
5621}
5622
5623// Bytes is a VPERM-like permute vector, except that -1 is used for
5624// undefined bytes. Return true if the VPERM can be implemented using P.
5625// When returning true set OpNo0 to the VPERM operand that should be
5626// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5627//
5628// For example, if swapping the VPERM operands allows P to match, OpNo0
5629// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5630// operand, but rewriting it to use two duplicated operands allows it to
5631// match P, then OpNo0 and OpNo1 will be the same.
5632static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5633 unsigned &OpNo0, unsigned &OpNo1) {
5634 int OpNos[] = { -1, -1 };
5635 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5636 int Elt = Bytes[I];
5637 if (Elt >= 0) {
5638 // Make sure that the two permute vectors use the same suboperand
5639 // byte number. Only the operand numbers (the high bits) are
5640 // allowed to differ.
5641 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5642 return false;
5643 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5644 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5645 // Make sure that the operand mappings are consistent with previous
5646 // elements.
5647 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5648 return false;
5649 OpNos[ModelOpNo] = RealOpNo;
5650 }
5651 }
5652 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5653}
5654
5655// As above, but search for a matching permute.
5656static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5657 unsigned &OpNo0, unsigned &OpNo1) {
5658 for (auto &P : PermuteForms)
5659 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5660 return &P;
5661 return nullptr;
5662}
5663
5664// Bytes is a VPERM-like permute vector, except that -1 is used for
5665// undefined bytes. This permute is an operand of an outer permute.
5666// See whether redistributing the -1 bytes gives a shuffle that can be
5667// implemented using P. If so, set Transform to a VPERM-like permute vector
5668// that, when applied to the result of P, gives the original permute in Bytes.
5670 const Permute &P,
5671 SmallVectorImpl<int> &Transform) {
5672 unsigned To = 0;
5673 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5674 int Elt = Bytes[From];
5675 if (Elt < 0)
5676 // Byte number From of the result is undefined.
5677 Transform[From] = -1;
5678 else {
5679 while (P.Bytes[To] != Elt) {
5680 To += 1;
5681 if (To == SystemZ::VectorBytes)
5682 return false;
5683 }
5684 Transform[From] = To;
5685 }
5686 }
5687 return true;
5688}
5689
5690// As above, but search for a matching permute.
5691static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5692 SmallVectorImpl<int> &Transform) {
5693 for (auto &P : PermuteForms)
5694 if (matchDoublePermute(Bytes, P, Transform))
5695 return &P;
5696 return nullptr;
5697}
5698
5699// Convert the mask of the given shuffle op into a byte-level mask,
5700// as if it had type vNi8.
5701static bool getVPermMask(SDValue ShuffleOp,
5702 SmallVectorImpl<int> &Bytes) {
5703 EVT VT = ShuffleOp.getValueType();
5704 unsigned NumElements = VT.getVectorNumElements();
5705 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5706
5707 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5708 Bytes.resize(NumElements * BytesPerElement, -1);
5709 for (unsigned I = 0; I < NumElements; ++I) {
5710 int Index = VSN->getMaskElt(I);
5711 if (Index >= 0)
5712 for (unsigned J = 0; J < BytesPerElement; ++J)
5713 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5714 }
5715 return true;
5716 }
5717 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5718 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5719 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5720 Bytes.resize(NumElements * BytesPerElement, -1);
5721 for (unsigned I = 0; I < NumElements; ++I)
5722 for (unsigned J = 0; J < BytesPerElement; ++J)
5723 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5724 return true;
5725 }
5726 return false;
5727}
5728
5729// Bytes is a VPERM-like permute vector, except that -1 is used for
5730// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5731// the result come from a contiguous sequence of bytes from one input.
5732// Set Base to the selector for the first byte if so.
5733static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5734 unsigned BytesPerElement, int &Base) {
5735 Base = -1;
5736 for (unsigned I = 0; I < BytesPerElement; ++I) {
5737 if (Bytes[Start + I] >= 0) {
5738 unsigned Elem = Bytes[Start + I];
5739 if (Base < 0) {
5740 Base = Elem - I;
5741 // Make sure the bytes would come from one input operand.
5742 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5743 return false;
5744 } else if (unsigned(Base) != Elem - I)
5745 return false;
5746 }
5747 }
5748 return true;
5749}
5750
5751// Bytes is a VPERM-like permute vector, except that -1 is used for
5752// undefined bytes. Return true if it can be performed using VSLDB.
5753// When returning true, set StartIndex to the shift amount and OpNo0
5754// and OpNo1 to the VPERM operands that should be used as the first
5755// and second shift operand respectively.
5757 unsigned &StartIndex, unsigned &OpNo0,
5758 unsigned &OpNo1) {
5759 int OpNos[] = { -1, -1 };
5760 int Shift = -1;
5761 for (unsigned I = 0; I < 16; ++I) {
5762 int Index = Bytes[I];
5763 if (Index >= 0) {
5764 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5765 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5766 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5767 if (Shift < 0)
5768 Shift = ExpectedShift;
5769 else if (Shift != ExpectedShift)
5770 return false;
5771 // Make sure that the operand mappings are consistent with previous
5772 // elements.
5773 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5774 return false;
5775 OpNos[ModelOpNo] = RealOpNo;
5776 }
5777 }
5778 StartIndex = Shift;
5779 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5780}
5781
5782// Create a node that performs P on operands Op0 and Op1, casting the
5783// operands to the appropriate type. The type of the result is determined by P.
5785 const Permute &P, SDValue Op0, SDValue Op1) {
5786 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5787 // elements of a PACK are twice as wide as the outputs.
5788 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5789 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5790 P.Operand);
5791 // Cast both operands to the appropriate type.
5792 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5793 SystemZ::VectorBytes / InBytes);
5794 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5795 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5796 SDValue Op;
5797 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5798 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5799 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5800 } else if (P.Opcode == SystemZISD::PACK) {
5801 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5802 SystemZ::VectorBytes / P.Operand);
5803 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5804 } else {
5805 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5806 }
5807 return Op;
5808}
5809
5810static bool isZeroVector(SDValue N) {
5811 if (N->getOpcode() == ISD::BITCAST)
5812 N = N->getOperand(0);
5813 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5814 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5815 return Op->getZExtValue() == 0;
5816 return ISD::isBuildVectorAllZeros(N.getNode());
5817}
5818
5819// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5820static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5821 for (unsigned I = 0; I < Num ; I++)
5822 if (isZeroVector(Ops[I]))
5823 return I;
5824 return UINT32_MAX;
5825}
5826
5827// Bytes is a VPERM-like permute vector, except that -1 is used for
5828// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5829// VSLDB or VPERM.
5831 SDValue *Ops,
5832 const SmallVectorImpl<int> &Bytes) {
5833 for (unsigned I = 0; I < 2; ++I)
5834 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5835
5836 // First see whether VSLDB can be used.
5837 unsigned StartIndex, OpNo0, OpNo1;
5838 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5839 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5840 Ops[OpNo1],
5841 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5842
5843 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5844 // eliminate a zero vector by reusing any zero index in the permute vector.
5845 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5846 if (ZeroVecIdx != UINT32_MAX) {
5847 bool MaskFirst = true;
5848 int ZeroIdx = -1;
5849 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5850 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5851 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5852 if (OpNo == ZeroVecIdx && I == 0) {
5853 // If the first byte is zero, use mask as first operand.
5854 ZeroIdx = 0;
5855 break;
5856 }
5857 if (OpNo != ZeroVecIdx && Byte == 0) {
5858 // If mask contains a zero, use it by placing that vector first.
5859 ZeroIdx = I + SystemZ::VectorBytes;
5860 MaskFirst = false;
5861 break;
5862 }
5863 }
5864 if (ZeroIdx != -1) {
5865 SDValue IndexNodes[SystemZ::VectorBytes];
5866 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5867 if (Bytes[I] >= 0) {
5868 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5869 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5870 if (OpNo == ZeroVecIdx)
5871 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5872 else {
5873 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5874 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5875 }
5876 } else
5877 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5878 }
5879 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5880 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5881 if (MaskFirst)
5882 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5883 Mask);
5884 else
5885 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5886 Mask);
5887 }
5888 }
5889
5890 SDValue IndexNodes[SystemZ::VectorBytes];
5891 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5892 if (Bytes[I] >= 0)
5893 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5894 else
5895 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5896 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5897 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5898 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5899}
5900
5901namespace {
5902// Describes a general N-operand vector shuffle.
5903struct GeneralShuffle {
5904 GeneralShuffle(EVT vt)
5905 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5906 void addUndef();
5907 bool add(SDValue, unsigned);
5908 SDValue getNode(SelectionDAG &, const SDLoc &);
5909 void tryPrepareForUnpack();
5910 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5911 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5912
5913 // The operands of the shuffle.
5915
5916 // Index I is -1 if byte I of the result is undefined. Otherwise the
5917 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5918 // Bytes[I] / SystemZ::VectorBytes.
5920
5921 // The type of the shuffle result.
5922 EVT VT;
5923
5924 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5925 unsigned UnpackFromEltSize;
5926 // True if the final unpack uses the low half.
5927 bool UnpackLow;
5928};
5929} // namespace
5930
5931// Add an extra undefined element to the shuffle.
5932void GeneralShuffle::addUndef() {
5933 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5934 for (unsigned I = 0; I < BytesPerElement; ++I)
5935 Bytes.push_back(-1);
5936}
5937
5938// Add an extra element to the shuffle, taking it from element Elem of Op.
5939// A null Op indicates a vector input whose value will be calculated later;
5940// there is at most one such input per shuffle and it always has the same
5941// type as the result. Aborts and returns false if the source vector elements
5942// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5943// LLVM they become implicitly extended, but this is rare and not optimized.
5944bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5945 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5946
5947 // The source vector can have wider elements than the result,
5948 // either through an explicit TRUNCATE or because of type legalization.
5949 // We want the least significant part.
5950 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5951 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5952
5953 // Return false if the source elements are smaller than their destination
5954 // elements.
5955 if (FromBytesPerElement < BytesPerElement)
5956 return false;
5957
5958 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5959 (FromBytesPerElement - BytesPerElement));
5960
5961 // Look through things like shuffles and bitcasts.
5962 while (Op.getNode()) {
5963 if (Op.getOpcode() == ISD::BITCAST)
5964 Op = Op.getOperand(0);
5965 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5966 // See whether the bytes we need come from a contiguous part of one
5967 // operand.
5969 if (!getVPermMask(Op, OpBytes))
5970 break;
5971 int NewByte;
5972 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5973 break;
5974 if (NewByte < 0) {
5975 addUndef();
5976 return true;
5977 }
5978 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5979 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5980 } else if (Op.isUndef()) {
5981 addUndef();
5982 return true;
5983 } else
5984 break;
5985 }
5986
5987 // Make sure that the source of the extraction is in Ops.
5988 unsigned OpNo = 0;
5989 for (; OpNo < Ops.size(); ++OpNo)
5990 if (Ops[OpNo] == Op)
5991 break;
5992 if (OpNo == Ops.size())
5993 Ops.push_back(Op);
5994
5995 // Add the element to Bytes.
5996 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5997 for (unsigned I = 0; I < BytesPerElement; ++I)
5998 Bytes.push_back(Base + I);
5999
6000 return true;
6001}
6002
6003// Return SDNodes for the completed shuffle.
6004SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6005 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6006
6007 if (Ops.size() == 0)
6008 return DAG.getUNDEF(VT);
6009
6010 // Use a single unpack if possible as the last operation.
6011 tryPrepareForUnpack();
6012
6013 // Make sure that there are at least two shuffle operands.
6014 if (Ops.size() == 1)
6015 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6016
6017 // Create a tree of shuffles, deferring root node until after the loop.
6018 // Try to redistribute the undefined elements of non-root nodes so that
6019 // the non-root shuffles match something like a pack or merge, then adjust
6020 // the parent node's permute vector to compensate for the new order.
6021 // Among other things, this copes with vectors like <2 x i16> that were
6022 // padded with undefined elements during type legalization.
6023 //
6024 // In the best case this redistribution will lead to the whole tree
6025 // using packs and merges. It should rarely be a loss in other cases.
6026 unsigned Stride = 1;
6027 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6028 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6029 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6030
6031 // Create a mask for just these two operands.
6033 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6034 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6035 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6036 if (OpNo == I)
6037 NewBytes[J] = Byte;
6038 else if (OpNo == I + Stride)
6039 NewBytes[J] = SystemZ::VectorBytes + Byte;
6040 else
6041 NewBytes[J] = -1;
6042 }
6043 // See if it would be better to reorganize NewMask to avoid using VPERM.
6045 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6046 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6047 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6048 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6049 if (NewBytes[J] >= 0) {
6050 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6051 "Invalid double permute");
6052 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6053 } else
6054 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6055 }
6056 } else {
6057 // Just use NewBytes on the operands.
6058 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6059 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6060 if (NewBytes[J] >= 0)
6061 Bytes[J] = I * SystemZ::VectorBytes + J;
6062 }
6063 }
6064 }
6065
6066 // Now we just have 2 inputs. Put the second operand in Ops[1].
6067 if (Stride > 1) {
6068 Ops[1] = Ops[Stride];
6069 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6070 if (Bytes[I] >= int(SystemZ::VectorBytes))
6071 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6072 }
6073
6074 // Look for an instruction that can do the permute without resorting
6075 // to VPERM.
6076 unsigned OpNo0, OpNo1;
6077 SDValue Op;
6078 if (unpackWasPrepared() && Ops[1].isUndef())
6079 Op = Ops[0];
6080 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6081 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6082 else
6083 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6084
6085 Op = insertUnpackIfPrepared(DAG, DL, Op);
6086
6087 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6088}
6089
6090#ifndef NDEBUG
6091static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6092 dbgs() << Msg.c_str() << " { ";
6093 for (unsigned I = 0; I < Bytes.size(); I++)
6094 dbgs() << Bytes[I] << " ";
6095 dbgs() << "}\n";
6096}
6097#endif
6098
6099// If the Bytes vector matches an unpack operation, prepare to do the unpack
6100// after all else by removing the zero vector and the effect of the unpack on
6101// Bytes.
6102void GeneralShuffle::tryPrepareForUnpack() {
6103 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6104 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6105 return;
6106
6107 // Only do this if removing the zero vector reduces the depth, otherwise
6108 // the critical path will increase with the final unpack.
6109 if (Ops.size() > 2 &&
6110 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6111 return;
6112
6113 // Find an unpack that would allow removing the zero vector from Ops.
6114 UnpackFromEltSize = 1;
6115 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6116 bool MatchUnpack = true;
6118 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6119 unsigned ToEltSize = UnpackFromEltSize * 2;
6120 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6121 if (!IsZextByte)
6122 SrcBytes.push_back(Bytes[Elt]);
6123 if (Bytes[Elt] != -1) {
6124 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6125 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6126 MatchUnpack = false;
6127 break;
6128 }
6129 }
6130 }
6131 if (MatchUnpack) {
6132 if (Ops.size() == 2) {
6133 // Don't use unpack if a single source operand needs rearrangement.
6134 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6135 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6136 if (SrcBytes[i] == -1)
6137 continue;
6138 if (SrcBytes[i] % 16 != int(i))
6139 CanUseUnpackHigh = false;
6140 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6141 CanUseUnpackLow = false;
6142 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6143 UnpackFromEltSize = UINT_MAX;
6144 return;
6145 }
6146 }
6147 if (!CanUseUnpackHigh)
6148 UnpackLow = true;
6149 }
6150 break;
6151 }
6152 }
6153 if (UnpackFromEltSize > 4)
6154 return;
6155
6156 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6157 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6158 << ".\n";
6159 dumpBytes(Bytes, "Original Bytes vector:"););
6160
6161 // Apply the unpack in reverse to the Bytes array.
6162 unsigned B = 0;
6163 if (UnpackLow) {
6164 while (B < SystemZ::VectorBytes / 2)
6165 Bytes[B++] = -1;
6166 }
6167 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6168 Elt += UnpackFromEltSize;
6169 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6170 Bytes[B] = Bytes[Elt];
6171 }
6172 if (!UnpackLow) {
6173 while (B < SystemZ::VectorBytes)
6174 Bytes[B++] = -1;
6175 }
6176
6177 // Remove the zero vector from Ops
6178 Ops.erase(&Ops[ZeroVecOpNo]);
6179 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6180 if (Bytes[I] >= 0) {
6181 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6182 if (OpNo > ZeroVecOpNo)
6183 Bytes[I] -= SystemZ::VectorBytes;
6184 }
6185
6186 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6187 dbgs() << "\n";);
6188}
6189
6190SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6191 const SDLoc &DL,
6192 SDValue Op) {
6193 if (!unpackWasPrepared())
6194 return Op;
6195 unsigned InBits = UnpackFromEltSize * 8;
6196 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6197 SystemZ::VectorBits / InBits);
6198 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6199 unsigned OutBits = InBits * 2;
6200 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6201 SystemZ::VectorBits / OutBits);
6202 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6204 DL, OutVT, PackedOp);
6205}
6206
6207// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6209 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6210 if (!Op.getOperand(I).isUndef())
6211 return false;
6212 return true;
6213}
6214
6215// Return a vector of type VT that contains Value in the first element.
6216// The other elements don't matter.
6218 SDValue Value) {
6219 // If we have a constant, replicate it to all elements and let the
6220 // BUILD_VECTOR lowering take care of it.
6221 if (Value.getOpcode() == ISD::Constant ||
6222 Value.getOpcode() == ISD::ConstantFP) {
6224 return DAG.getBuildVector(VT, DL, Ops);
6225 }
6226 if (Value.isUndef())
6227 return DAG.getUNDEF(VT);
6228 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6229}
6230
6231// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6232// element 1. Used for cases in which replication is cheap.
6234 SDValue Op0, SDValue Op1) {
6235 if (Op0.isUndef()) {
6236 if (Op1.isUndef())
6237 return DAG.getUNDEF(VT);
6238 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6239 }
6240 if (Op1.isUndef())
6241 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6242 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6243 buildScalarToVector(DAG, DL, VT, Op0),
6244 buildScalarToVector(DAG, DL, VT, Op1));
6245}
6246
6247// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6248// vector for them.
6250 SDValue Op1) {
6251 if (Op0.isUndef() && Op1.isUndef())
6252 return DAG.getUNDEF(MVT::v2i64);
6253 // If one of the two inputs is undefined then replicate the other one,
6254 // in order to avoid using another register unnecessarily.
6255 if (Op0.isUndef())
6256 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6257 else if (Op1.isUndef())
6258 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6259 else {
6260 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6261 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6262 }
6263 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6264}
6265
6266// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6267// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6268// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6269// would benefit from this representation and return it if so.
6271 BuildVectorSDNode *BVN) {
6272 EVT VT = BVN->getValueType(0);
6273 unsigned NumElements = VT.getVectorNumElements();
6274
6275 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6276 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6277 // need a BUILD_VECTOR, add an additional placeholder operand for that
6278 // BUILD_VECTOR and store its operands in ResidueOps.
6279 GeneralShuffle GS(VT);
6281 bool FoundOne = false;
6282 for (unsigned I = 0; I < NumElements; ++I) {
6283 SDValue Op = BVN->getOperand(I);
6284 if (Op.getOpcode() == ISD::TRUNCATE)
6285 Op = Op.getOperand(0);
6286 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6287 Op.getOperand(1).getOpcode() == ISD::Constant) {
6288 unsigned Elem = Op.getConstantOperandVal(1);
6289 if (!GS.add(Op.getOperand(0), Elem))
6290 return SDValue();
6291 FoundOne = true;
6292 } else if (Op.isUndef()) {
6293 GS.addUndef();
6294 } else {
6295 if (!GS.add(SDValue(), ResidueOps.size()))
6296 return SDValue();
6297 ResidueOps.push_back(BVN->getOperand(I));
6298 }
6299 }
6300
6301 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6302 if (!FoundOne)
6303 return SDValue();
6304
6305 // Create the BUILD_VECTOR for the remaining elements, if any.
6306 if (!ResidueOps.empty()) {
6307 while (ResidueOps.size() < NumElements)
6308 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6309 for (auto &Op : GS.Ops) {
6310 if (!Op.getNode()) {
6311 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6312 break;
6313 }
6314 }
6315 }
6316 return GS.getNode(DAG, SDLoc(BVN));
6317}
6318
6319bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6320 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6321 return true;
6322 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6323 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6324 return true;
6325 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6326 return true;
6327 return false;
6328}
6329
6330// Combine GPR scalar values Elems into a vector of type VT.
6331SDValue
6332SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6333 SmallVectorImpl<SDValue> &Elems) const {
6334 // See whether there is a single replicated value.
6336 unsigned int NumElements = Elems.size();
6337 unsigned int Count = 0;
6338 for (auto Elem : Elems) {
6339 if (!Elem.isUndef()) {
6340 if (!Single.getNode())
6341 Single = Elem;
6342 else if (Elem != Single) {
6343 Single = SDValue();
6344 break;
6345 }
6346 Count += 1;
6347 }
6348 }
6349 // There are three cases here:
6350 //
6351 // - if the only defined element is a loaded one, the best sequence
6352 // is a replicating load.
6353 //
6354 // - otherwise, if the only defined element is an i64 value, we will
6355 // end up with the same VLVGP sequence regardless of whether we short-cut
6356 // for replication or fall through to the later code.
6357 //
6358 // - otherwise, if the only defined element is an i32 or smaller value,
6359 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6360 // This is only a win if the single defined element is used more than once.
6361 // In other cases we're better off using a single VLVGx.
6362 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6363 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6364
6365 // If all elements are loads, use VLREP/VLEs (below).
6366 bool AllLoads = true;
6367 for (auto Elem : Elems)
6368 if (!isVectorElementLoad(Elem)) {
6369 AllLoads = false;
6370 break;
6371 }
6372
6373 // The best way of building a v2i64 from two i64s is to use VLVGP.
6374 if (VT == MVT::v2i64 && !AllLoads)
6375 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6376
6377 // Use a 64-bit merge high to combine two doubles.
6378 if (VT == MVT::v2f64 && !AllLoads)
6379 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6380
6381 // Build v4f32 values directly from the FPRs:
6382 //
6383 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6384 // V V VMRHF
6385 // <ABxx> <CDxx>
6386 // V VMRHG
6387 // <ABCD>
6388 if (VT == MVT::v4f32 && !AllLoads) {
6389 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6390 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6391 // Avoid unnecessary undefs by reusing the other operand.
6392 if (Op01.isUndef())
6393 Op01 = Op23;
6394 else if (Op23.isUndef())
6395 Op23 = Op01;
6396 // Merging identical replications is a no-op.
6397 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6398 return Op01;
6399 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6400 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6402 DL, MVT::v2i64, Op01, Op23);
6403 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6404 }
6405
6406 // Collect the constant terms.
6409
6410 unsigned NumConstants = 0;
6411 for (unsigned I = 0; I < NumElements; ++I) {
6412 SDValue Elem = Elems[I];
6413 if (Elem.getOpcode() == ISD::Constant ||
6414 Elem.getOpcode() == ISD::ConstantFP) {
6415 NumConstants += 1;
6416 Constants[I] = Elem;
6417 Done[I] = true;
6418 }
6419 }
6420 // If there was at least one constant, fill in the other elements of
6421 // Constants with undefs to get a full vector constant and use that
6422 // as the starting point.
6424 SDValue ReplicatedVal;
6425 if (NumConstants > 0) {
6426 for (unsigned I = 0; I < NumElements; ++I)
6427 if (!Constants[I].getNode())
6428 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6429 Result = DAG.getBuildVector(VT, DL, Constants);
6430 } else {
6431 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6432 // avoid a false dependency on any previous contents of the vector
6433 // register.
6434
6435 // Use a VLREP if at least one element is a load. Make sure to replicate
6436 // the load with the most elements having its value.
6437 std::map<const SDNode*, unsigned> UseCounts;
6438 SDNode *LoadMaxUses = nullptr;
6439 for (unsigned I = 0; I < NumElements; ++I)
6440 if (isVectorElementLoad(Elems[I])) {
6441 SDNode *Ld = Elems[I].getNode();
6442 unsigned Count = ++UseCounts[Ld];
6443 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6444 LoadMaxUses = Ld;
6445 }
6446 if (LoadMaxUses != nullptr) {
6447 ReplicatedVal = SDValue(LoadMaxUses, 0);
6448 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6449 } else {
6450 // Try to use VLVGP.
6451 unsigned I1 = NumElements / 2 - 1;
6452 unsigned I2 = NumElements - 1;
6453 bool Def1 = !Elems[I1].isUndef();
6454 bool Def2 = !Elems[I2].isUndef();
6455 if (Def1 || Def2) {
6456 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6457 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6458 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6459 joinDwords(DAG, DL, Elem1, Elem2));
6460 Done[I1] = true;
6461 Done[I2] = true;
6462 } else
6463 Result = DAG.getUNDEF(VT);
6464 }
6465 }
6466
6467 // Use VLVGx to insert the other elements.
6468 for (unsigned I = 0; I < NumElements; ++I)
6469 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6470 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6471 DAG.getConstant(I, DL, MVT::i32));
6472 return Result;
6473}
6474
6475SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6476 SelectionDAG &DAG) const {
6477 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6478 SDLoc DL(Op);
6479 EVT VT = Op.getValueType();
6480
6481 if (BVN->isConstant()) {
6482 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6483 return Op;
6484
6485 // Fall back to loading it from memory.
6486 return SDValue();
6487 }
6488
6489 // See if we should use shuffles to construct the vector from other vectors.
6490 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6491 return Res;
6492
6493 // Detect SCALAR_TO_VECTOR conversions.
6495 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6496
6497 // Otherwise use buildVector to build the vector up from GPRs.
6498 unsigned NumElements = Op.getNumOperands();
6500 for (unsigned I = 0; I < NumElements; ++I)
6501 Ops[I] = Op.getOperand(I);
6502 return buildVector(DAG, DL, VT, Ops);
6503}
6504
6505SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6506 SelectionDAG &DAG) const {
6507 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6508 SDLoc DL(Op);
6509 EVT VT = Op.getValueType();
6510 unsigned NumElements = VT.getVectorNumElements();
6511
6512 if (VSN->isSplat()) {
6513 SDValue Op0 = Op.getOperand(0);
6514 unsigned Index = VSN->getSplatIndex();
6515 assert(Index < VT.getVectorNumElements() &&
6516 "Splat index should be defined and in first operand");
6517 // See whether the value we're splatting is directly available as a scalar.
6518 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6520 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6521 // Otherwise keep it as a vector-to-vector operation.
6522 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6523 DAG.getTargetConstant(Index, DL, MVT::i32));
6524 }
6525
6526 GeneralShuffle GS(VT);
6527 for (unsigned I = 0; I < NumElements; ++I) {
6528 int Elt = VSN->getMaskElt(I);
6529 if (Elt < 0)
6530 GS.addUndef();
6531 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6532 unsigned(Elt) % NumElements))
6533 return SDValue();
6534 }
6535 return GS.getNode(DAG, SDLoc(VSN));
6536}
6537
6538SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6539 SelectionDAG &DAG) const {
6540 SDLoc DL(Op);
6541 // Just insert the scalar into element 0 of an undefined vector.
6542 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6543 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6544 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6545}
6546
6547SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6548 SelectionDAG &DAG) const {
6549 // Handle insertions of floating-point values.
6550 SDLoc DL(Op);
6551 SDValue Op0 = Op.getOperand(0);
6552 SDValue Op1 = Op.getOperand(1);
6553 SDValue Op2 = Op.getOperand(2);
6554 EVT VT = Op.getValueType();
6555
6556 // Insertions into constant indices of a v2f64 can be done using VPDI.
6557 // However, if the inserted value is a bitcast or a constant then it's
6558 // better to use GPRs, as below.
6559 if (VT == MVT::v2f64 &&
6560 Op1.getOpcode() != ISD::BITCAST &&
6561 Op1.getOpcode() != ISD::ConstantFP &&
6562 Op2.getOpcode() == ISD::Constant) {
6563 uint64_t Index = Op2->getAsZExtVal();
6564 unsigned Mask = VT.getVectorNumElements() - 1;
6565 if (Index <= Mask)
6566 return Op;
6567 }
6568
6569 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6570 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6571 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6572 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6573 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6574 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6575 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6576}
6577
6578SDValue
6579SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6580 SelectionDAG &DAG) const {
6581 // Handle extractions of floating-point values.
6582 SDLoc DL(Op);
6583 SDValue Op0 = Op.getOperand(0);
6584 SDValue Op1 = Op.getOperand(1);
6585 EVT VT = Op.getValueType();
6586 EVT VecVT = Op0.getValueType();
6587
6588 // Extractions of constant indices can be done directly.
6589 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6590 uint64_t Index = CIndexN->getZExtValue();
6591 unsigned Mask = VecVT.getVectorNumElements() - 1;
6592 if (Index <= Mask)
6593 return Op;
6594 }
6595
6596 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6597 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6598 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6599 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6600 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6601 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6602}
6603
6604SDValue SystemZTargetLowering::
6605lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6606 SDValue PackedOp = Op.getOperand(0);
6607 EVT OutVT = Op.getValueType();
6608 EVT InVT = PackedOp.getValueType();
6609 unsigned ToBits = OutVT.getScalarSizeInBits();
6610 unsigned FromBits = InVT.getScalarSizeInBits();
6611 unsigned StartOffset = 0;
6612
6613 // If the input is a VECTOR_SHUFFLE, there are a number of important
6614 // cases where we can directly implement the sign-extension of the
6615 // original input lanes of the shuffle.
6616 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6617 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6618 ArrayRef<int> ShuffleMask = SVN->getMask();
6619 int OutNumElts = OutVT.getVectorNumElements();
6620
6621 // Recognize the special case where the sign-extension can be done
6622 // by the VSEG instruction. Handled via the default expander.
6623 if (ToBits == 64 && OutNumElts == 2) {
6624 int NumElem = ToBits / FromBits;
6625 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6626 return SDValue();
6627 }
6628
6629 // Recognize the special case where we can fold the shuffle by
6630 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6631 int StartOffsetCandidate = -1;
6632 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6633 if (ShuffleMask[Elt] == -1)
6634 continue;
6635 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6636 if (StartOffsetCandidate == -1)
6637 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6638 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6639 continue;
6640 }
6641 StartOffsetCandidate = -1;
6642 break;
6643 }
6644 if (StartOffsetCandidate != -1) {
6645 StartOffset = StartOffsetCandidate;
6646 PackedOp = PackedOp.getOperand(0);
6647 }
6648 }
6649
6650 do {
6651 FromBits *= 2;
6652 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6653 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6654 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6655 if (StartOffset >= OutNumElts) {
6656 Opcode = SystemZISD::UNPACK_LOW;
6657 StartOffset -= OutNumElts;
6658 }
6659 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6660 } while (FromBits != ToBits);
6661 return PackedOp;
6662}
6663
6664// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6665SDValue SystemZTargetLowering::
6666lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6667 SDValue PackedOp = Op.getOperand(0);
6668 SDLoc DL(Op);
6669 EVT OutVT = Op.getValueType();
6670 EVT InVT = PackedOp.getValueType();
6671 unsigned InNumElts = InVT.getVectorNumElements();
6672 unsigned OutNumElts = OutVT.getVectorNumElements();
6673 unsigned NumInPerOut = InNumElts / OutNumElts;
6674
6675 SDValue ZeroVec =
6676 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6677
6678 SmallVector<int, 16> Mask(InNumElts);
6679 unsigned ZeroVecElt = InNumElts;
6680 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6681 unsigned MaskElt = PackedElt * NumInPerOut;
6682 unsigned End = MaskElt + NumInPerOut - 1;
6683 for (; MaskElt < End; MaskElt++)
6684 Mask[MaskElt] = ZeroVecElt++;
6685 Mask[MaskElt] = PackedElt;
6686 }
6687 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6688 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6689}
6690
6691SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6692 unsigned ByScalar) const {
6693 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6694 SDValue Op0 = Op.getOperand(0);
6695 SDValue Op1 = Op.getOperand(1);
6696 SDLoc DL(Op);
6697 EVT VT = Op.getValueType();
6698 unsigned ElemBitSize = VT.getScalarSizeInBits();
6699
6700 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6701 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6702 APInt SplatBits, SplatUndef;
6703 unsigned SplatBitSize;
6704 bool HasAnyUndefs;
6705 // Check for constant splats. Use ElemBitSize as the minimum element
6706 // width and reject splats that need wider elements.
6707 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6708 ElemBitSize, true) &&
6709 SplatBitSize == ElemBitSize) {
6710 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6711 DL, MVT::i32);
6712 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6713 }
6714 // Check for variable splats.
6715 BitVector UndefElements;
6716 SDValue Splat = BVN->getSplatValue(&UndefElements);
6717 if (Splat) {
6718 // Since i32 is the smallest legal type, we either need a no-op
6719 // or a truncation.
6720 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6721 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6722 }
6723 }
6724
6725 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6726 // and the shift amount is directly available in a GPR.
6727 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6728 if (VSN->isSplat()) {
6729 SDValue VSNOp0 = VSN->getOperand(0);
6730 unsigned Index = VSN->getSplatIndex();
6731 assert(Index < VT.getVectorNumElements() &&
6732 "Splat index should be defined and in first operand");
6733 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6734 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6735 // Since i32 is the smallest legal type, we either need a no-op
6736 // or a truncation.
6737 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6738 VSNOp0.getOperand(Index));
6739 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6740 }
6741 }
6742 }
6743
6744 // Otherwise just treat the current form as legal.
6745 return Op;
6746}
6747
6748SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6749 SDLoc DL(Op);
6750
6751 // i128 FSHL with a constant amount that is a multiple of 8 can be
6752 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6753 // facility, FSHL with a constant amount less than 8 can be implemented
6754 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6755 // combination of the two.
6756 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6757 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6758 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6759 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6760 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6761 if (ShiftAmt > 120) {
6762 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6763 // SHR_DOUBLE_BIT emits fewer instructions.
6764 SDValue Val =
6765 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6766 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6767 return DAG.getBitcast(MVT::i128, Val);
6768 }
6769 SmallVector<int, 16> Mask(16);
6770 for (unsigned Elt = 0; Elt < 16; Elt++)
6771 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6772 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6773 if ((ShiftAmt & 7) == 0)
6774 return DAG.getBitcast(MVT::i128, Shuf1);
6775 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6776 SDValue Val =
6777 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6778 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6779 return DAG.getBitcast(MVT::i128, Val);
6780 }
6781 }
6782
6783 return SDValue();
6784}
6785
6786SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6787 SDLoc DL(Op);
6788
6789 // i128 FSHR with a constant amount that is a multiple of 8 can be
6790 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6791 // facility, FSHR with a constant amount less than 8 can be implemented
6792 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6793 // combination of the two.
6794 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6795 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6796 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6797 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6798 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6799 if (ShiftAmt > 120) {
6800 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6801 // SHL_DOUBLE_BIT emits fewer instructions.
6802 SDValue Val =
6803 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6804 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6805 return DAG.getBitcast(MVT::i128, Val);
6806 }
6807 SmallVector<int, 16> Mask(16);
6808 for (unsigned Elt = 0; Elt < 16; Elt++)
6809 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6810 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6811 if ((ShiftAmt & 7) == 0)
6812 return DAG.getBitcast(MVT::i128, Shuf1);
6813 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6814 SDValue Val =
6815 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6816 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6817 return DAG.getBitcast(MVT::i128, Val);
6818 }
6819 }
6820
6821 return SDValue();
6822}
6823
6825 SDLoc DL(Op);
6826 SDValue Src = Op.getOperand(0);
6827 MVT DstVT = Op.getSimpleValueType();
6828
6830 unsigned SrcAS = N->getSrcAddressSpace();
6831
6832 assert(SrcAS != N->getDestAddressSpace() &&
6833 "addrspacecast must be between different address spaces");
6834
6835 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6836 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6837 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6838 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6839 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6840 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6841 } else if (DstVT == MVT::i32) {
6842 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6843 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
6844 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6845 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6846 } else {
6847 report_fatal_error("Bad address space in addrspacecast");
6848 }
6849 return Op;
6850}
6851
6852SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6853 SelectionDAG &DAG) const {
6854 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
6855 if (In.getSimpleValueType() != MVT::f16)
6856 return Op; // Legal
6857 return SDValue(); // Let legalizer emit the libcall.
6858}
6859
6861 MVT VT, SDValue Arg, SDLoc DL,
6862 SDValue Chain, bool IsStrict) const {
6863 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6864 MakeLibCallOptions CallOptions;
6865 SDValue Result;
6866 std::tie(Result, Chain) =
6867 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
6868 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
6869}
6870
6871SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6872 SelectionDAG &DAG) const {
6873 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
6874 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
6875 bool IsStrict = Op->isStrictFPOpcode();
6876 SDLoc DL(Op);
6877 MVT VT = Op.getSimpleValueType();
6878 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6879 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6880 EVT InVT = InOp.getValueType();
6881
6882 // FP to unsigned is not directly supported on z10. Promoting an i32
6883 // result to (signed) i64 doesn't generate an inexact condition (fp
6884 // exception) for values that are outside the i32 range but in the i64
6885 // range, so use the default expansion.
6886 if (!Subtarget.hasFPExtension() && !IsSigned)
6887 // Expand i32/i64. F16 values will be recognized to fit and extended.
6888 return SDValue();
6889
6890 // Conversion from f16 is done via f32.
6891 if (InOp.getSimpleValueType() == MVT::f16) {
6893 LowerOperationWrapper(Op.getNode(), Results, DAG);
6894 return DAG.getMergeValues(Results, DL);
6895 }
6896
6897 if (VT == MVT::i128) {
6898 RTLIB::Libcall LC =
6899 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
6900 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6901 }
6902
6903 return Op; // Legal
6904}
6905
6906SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
6907 SelectionDAG &DAG) const {
6908 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
6909 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
6910 bool IsStrict = Op->isStrictFPOpcode();
6911 SDLoc DL(Op);
6912 MVT VT = Op.getSimpleValueType();
6913 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6914 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6915 EVT InVT = InOp.getValueType();
6916
6917 // Conversion to f16 is done via f32.
6918 if (VT == MVT::f16) {
6920 LowerOperationWrapper(Op.getNode(), Results, DAG);
6921 return DAG.getMergeValues(Results, DL);
6922 }
6923
6924 // Unsigned to fp is not directly supported on z10.
6925 if (!Subtarget.hasFPExtension() && !IsSigned)
6926 return SDValue(); // Expand i64.
6927
6928 if (InVT == MVT::i128) {
6929 RTLIB::Libcall LC =
6930 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
6931 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6932 }
6933
6934 return Op; // Legal
6935}
6936
6937// Shift the lower 2 bytes of Op to the left in order to insert into the
6938// upper 2 bytes of the FP register.
6940 assert(Op.getSimpleValueType() == MVT::i64 &&
6941 "Expexted to convert i64 to f16.");
6942 SDLoc DL(Op);
6943 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6944 DAG.getConstant(48, DL, MVT::i64));
6945 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6946 SDValue F16Val =
6947 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6948 return F16Val;
6949}
6950
6951// Extract Op into GPR and shift the 2 f16 bytes to the right.
6953 assert(Op.getSimpleValueType() == MVT::f16 &&
6954 "Expected to convert f16 to i64.");
6955 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6956 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6957 SDValue(U32, 0), Op);
6958 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6959 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6960 DAG.getConstant(48, DL, MVT::i32));
6961 return Shft;
6962}
6963
6964// Lower an f16 LOAD in case of no vector support.
6965SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
6966 SelectionDAG &DAG) const {
6967 EVT RegVT = Op.getValueType();
6968 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
6969 (void)RegVT;
6970
6971 // Load as integer.
6972 SDLoc DL(Op);
6973 SDValue NewLd;
6974 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
6975 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
6976 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
6977 AtomicLd->getChain(), AtomicLd->getBasePtr(),
6978 AtomicLd->getMemOperand());
6979 } else {
6980 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
6981 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
6982 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
6983 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
6984 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
6985 }
6986 SDValue F16Val = convertToF16(NewLd, DAG);
6987 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
6988}
6989
6990// Lower an f16 STORE in case of no vector support.
6991SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
6992 SelectionDAG &DAG) const {
6993 SDLoc DL(Op);
6994 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
6995
6996 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
6997 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
6998 Shft, AtomicSt->getBasePtr(),
6999 AtomicSt->getMemOperand());
7000
7001 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
7002 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
7003 St->getMemOperand());
7004}
7005
7006SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7007 SelectionDAG &DAG) const {
7008 SDLoc DL(Op);
7009 MVT ResultVT = Op.getSimpleValueType();
7010 SDValue Arg = Op.getOperand(0);
7011 unsigned Check = Op.getConstantOperandVal(1);
7012
7013 unsigned TDCMask = 0;
7014 if (Check & fcSNan)
7016 if (Check & fcQNan)
7018 if (Check & fcPosInf)
7020 if (Check & fcNegInf)
7022 if (Check & fcPosNormal)
7024 if (Check & fcNegNormal)
7026 if (Check & fcPosSubnormal)
7028 if (Check & fcNegSubnormal)
7030 if (Check & fcPosZero)
7031 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7032 if (Check & fcNegZero)
7033 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7034 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7035
7036 if (Arg.getSimpleValueType() == MVT::f16)
7037 Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32);
7038 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7039 return getCCResult(DAG, Intr);
7040}
7041
7042SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7043 SelectionDAG &DAG) const {
7044 SDLoc DL(Op);
7045 SDValue Chain = Op.getOperand(0);
7046
7047 // STCKF only supports a memory operand, so we have to use a temporary.
7048 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7049 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7050 MachinePointerInfo MPI =
7052
7053 // Use STCFK to store the TOD clock into the temporary.
7054 SDValue StoreOps[] = {Chain, StackPtr};
7055 Chain = DAG.getMemIntrinsicNode(
7056 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7057 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7058
7059 // And read it back from there.
7060 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7061}
7062
7064 SelectionDAG &DAG) const {
7065 switch (Op.getOpcode()) {
7066 case ISD::FRAMEADDR:
7067 return lowerFRAMEADDR(Op, DAG);
7068 case ISD::RETURNADDR:
7069 return lowerRETURNADDR(Op, DAG);
7070 case ISD::BR_CC:
7071 return lowerBR_CC(Op, DAG);
7072 case ISD::SELECT_CC:
7073 return lowerSELECT_CC(Op, DAG);
7074 case ISD::SETCC:
7075 return lowerSETCC(Op, DAG);
7076 case ISD::STRICT_FSETCC:
7077 return lowerSTRICT_FSETCC(Op, DAG, false);
7079 return lowerSTRICT_FSETCC(Op, DAG, true);
7080 case ISD::GlobalAddress:
7081 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7083 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7084 case ISD::BlockAddress:
7085 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7086 case ISD::JumpTable:
7087 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7088 case ISD::ConstantPool:
7089 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7090 case ISD::BITCAST:
7091 return lowerBITCAST(Op, DAG);
7092 case ISD::VASTART:
7093 return lowerVASTART(Op, DAG);
7094 case ISD::VACOPY:
7095 return lowerVACOPY(Op, DAG);
7096 case ISD::DYNAMIC_STACKALLOC:
7097 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7098 case ISD::GET_DYNAMIC_AREA_OFFSET:
7099 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7100 case ISD::MULHS:
7101 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7102 case ISD::MULHU:
7103 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7104 case ISD::SMUL_LOHI:
7105 return lowerSMUL_LOHI(Op, DAG);
7106 case ISD::UMUL_LOHI:
7107 return lowerUMUL_LOHI(Op, DAG);
7108 case ISD::SDIVREM:
7109 return lowerSDIVREM(Op, DAG);
7110 case ISD::UDIVREM:
7111 return lowerUDIVREM(Op, DAG);
7112 case ISD::SADDO:
7113 case ISD::SSUBO:
7114 case ISD::UADDO:
7115 case ISD::USUBO:
7116 return lowerXALUO(Op, DAG);
7117 case ISD::UADDO_CARRY:
7118 case ISD::USUBO_CARRY:
7119 return lowerUADDSUBO_CARRY(Op, DAG);
7120 case ISD::OR:
7121 return lowerOR(Op, DAG);
7122 case ISD::CTPOP:
7123 return lowerCTPOP(Op, DAG);
7124 case ISD::VECREDUCE_ADD:
7125 return lowerVECREDUCE_ADD(Op, DAG);
7126 case ISD::ATOMIC_FENCE:
7127 return lowerATOMIC_FENCE(Op, DAG);
7128 case ISD::ATOMIC_SWAP:
7129 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7130 case ISD::ATOMIC_STORE:
7131 return lowerATOMIC_STORE(Op, DAG);
7132 case ISD::ATOMIC_LOAD:
7133 return lowerATOMIC_LOAD(Op, DAG);
7134 case ISD::ATOMIC_LOAD_ADD:
7135 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7136 case ISD::ATOMIC_LOAD_SUB:
7137 return lowerATOMIC_LOAD_SUB(Op, DAG);
7138 case ISD::ATOMIC_LOAD_AND:
7139 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7140 case ISD::ATOMIC_LOAD_OR:
7141 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7142 case ISD::ATOMIC_LOAD_XOR:
7143 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7144 case ISD::ATOMIC_LOAD_NAND:
7145 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7146 case ISD::ATOMIC_LOAD_MIN:
7147 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7148 case ISD::ATOMIC_LOAD_MAX:
7149 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7150 case ISD::ATOMIC_LOAD_UMIN:
7151 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7152 case ISD::ATOMIC_LOAD_UMAX:
7153 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7154 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
7155 return lowerATOMIC_CMP_SWAP(Op, DAG);
7156 case ISD::STACKSAVE:
7157 return lowerSTACKSAVE(Op, DAG);
7158 case ISD::STACKRESTORE:
7159 return lowerSTACKRESTORE(Op, DAG);
7160 case ISD::PREFETCH:
7161 return lowerPREFETCH(Op, DAG);
7163 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7165 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7166 case ISD::BUILD_VECTOR:
7167 return lowerBUILD_VECTOR(Op, DAG);
7169 return lowerVECTOR_SHUFFLE(Op, DAG);
7171 return lowerSCALAR_TO_VECTOR(Op, DAG);
7173 return lowerINSERT_VECTOR_ELT(Op, DAG);
7175 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7177 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7179 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7180 case ISD::SHL:
7181 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7182 case ISD::SRL:
7183 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7184 case ISD::SRA:
7185 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7186 case ISD::ADDRSPACECAST:
7187 return lowerAddrSpaceCast(Op, DAG);
7188 case ISD::ROTL:
7189 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7190 case ISD::FSHL:
7191 return lowerFSHL(Op, DAG);
7192 case ISD::FSHR:
7193 return lowerFSHR(Op, DAG);
7194 case ISD::FP_EXTEND:
7196 return lowerFP_EXTEND(Op, DAG);
7197 case ISD::FP_TO_UINT:
7198 case ISD::FP_TO_SINT:
7201 return lower_FP_TO_INT(Op, DAG);
7202 case ISD::UINT_TO_FP:
7203 case ISD::SINT_TO_FP:
7206 return lower_INT_TO_FP(Op, DAG);
7207 case ISD::LOAD:
7208 return lowerLoadF16(Op, DAG);
7209 case ISD::STORE:
7210 return lowerStoreF16(Op, DAG);
7211 case ISD::IS_FPCLASS:
7212 return lowerIS_FPCLASS(Op, DAG);
7213 case ISD::GET_ROUNDING:
7214 return lowerGET_ROUNDING(Op, DAG);
7215 case ISD::READCYCLECOUNTER:
7216 return lowerREADCYCLECOUNTER(Op, DAG);
7219 // These operations are legal on our platform, but we cannot actually
7220 // set the operation action to Legal as common code would treat this
7221 // as equivalent to Expand. Instead, we keep the operation action to
7222 // Custom and just leave them unchanged here.
7223 return Op;
7224
7225 default:
7226 llvm_unreachable("Unexpected node to lower");
7227 }
7228}
7229
7231 const SDLoc &SL) {
7232 // If i128 is legal, just use a normal bitcast.
7233 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7234 return DAG.getBitcast(MVT::f128, Src);
7235
7236 // Otherwise, f128 must live in FP128, so do a partwise move.
7238 &SystemZ::FP128BitRegClass);
7239
7240 SDValue Hi, Lo;
7241 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7242
7243 Hi = DAG.getBitcast(MVT::f64, Hi);
7244 Lo = DAG.getBitcast(MVT::f64, Lo);
7245
7246 SDNode *Pair = DAG.getMachineNode(
7247 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7248 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7249 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7250 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7251 return SDValue(Pair, 0);
7252}
7253
7255 const SDLoc &SL) {
7256 // If i128 is legal, just use a normal bitcast.
7257 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7258 return DAG.getBitcast(MVT::i128, Src);
7259
7260 // Otherwise, f128 must live in FP128, so do a partwise move.
7262 &SystemZ::FP128BitRegClass);
7263
7264 SDValue LoFP =
7265 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7266 SDValue HiFP =
7267 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7268 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7269 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7270
7271 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7272}
7273
7274// Lower operations with invalid operand or result types.
7275void
7278 SelectionDAG &DAG) const {
7279 switch (N->getOpcode()) {
7280 case ISD::ATOMIC_LOAD: {
7281 SDLoc DL(N);
7282 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7283 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7284 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7286 DL, Tys, Ops, MVT::i128, MMO);
7287
7288 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7289 if (N->getValueType(0) == MVT::f128)
7290 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7291 Results.push_back(Lowered);
7292 Results.push_back(Res.getValue(1));
7293 break;
7294 }
7295 case ISD::ATOMIC_STORE: {
7296 SDLoc DL(N);
7297 SDVTList Tys = DAG.getVTList(MVT::Other);
7298 SDValue Val = N->getOperand(1);
7299 if (Val.getValueType() == MVT::f128)
7300 Val = expandBitCastF128ToI128(DAG, Val, DL);
7301 Val = lowerI128ToGR128(DAG, Val);
7302
7303 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7304 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7306 DL, Tys, Ops, MVT::i128, MMO);
7307 // We have to enforce sequential consistency by performing a
7308 // serialization operation after the store.
7309 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7311 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7312 MVT::Other, Res), 0);
7313 Results.push_back(Res);
7314 break;
7315 }
7316 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
7317 SDLoc DL(N);
7318 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7319 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7320 lowerI128ToGR128(DAG, N->getOperand(2)),
7321 lowerI128ToGR128(DAG, N->getOperand(3)) };
7322 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7324 DL, Tys, Ops, MVT::i128, MMO);
7325 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7327 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7328 Results.push_back(lowerGR128ToI128(DAG, Res));
7329 Results.push_back(Success);
7330 Results.push_back(Res.getValue(2));
7331 break;
7332 }
7333 case ISD::BITCAST: {
7334 if (useSoftFloat())
7335 return;
7336 SDLoc DL(N);
7337 SDValue Src = N->getOperand(0);
7338 EVT SrcVT = Src.getValueType();
7339 EVT ResVT = N->getValueType(0);
7340 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7341 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7342 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7343 if (Subtarget.hasVector()) {
7344 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7345 Results.push_back(SDValue(
7346 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7347 } else {
7348 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7349 Results.push_back(convertToF16(In64, DAG));
7350 }
7351 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7352 SDValue ExtractedVal =
7353 Subtarget.hasVector()
7354 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7355 0)
7356 : convertFromF16(Src, DL, DAG);
7357 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7358 }
7359 break;
7360 }
7361 case ISD::UINT_TO_FP:
7362 case ISD::SINT_TO_FP:
7365 if (useSoftFloat())
7366 return;
7367 bool IsStrict = N->isStrictFPOpcode();
7368 SDLoc DL(N);
7369 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7370 EVT ResVT = N->getValueType(0);
7371 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7372 if (ResVT == MVT::f16) {
7373 if (!IsStrict) {
7374 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7375 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7376 } else {
7377 SDValue OpF32 =
7378 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7379 {Chain, InOp});
7380 SDValue F16Res;
7381 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7382 OpF32, OpF32.getValue(1), DL, MVT::f16);
7383 Results.push_back(F16Res);
7384 Results.push_back(Chain);
7385 }
7386 }
7387 break;
7388 }
7389 case ISD::FP_TO_UINT:
7390 case ISD::FP_TO_SINT:
7393 if (useSoftFloat())
7394 return;
7395 bool IsStrict = N->isStrictFPOpcode();
7396 SDLoc DL(N);
7397 EVT ResVT = N->getValueType(0);
7398 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7399 EVT InVT = InOp->getValueType(0);
7400 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7401 if (InVT == MVT::f16) {
7402 if (!IsStrict) {
7403 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7404 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7405 } else {
7406 SDValue InF32;
7407 std::tie(InF32, Chain) =
7408 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7409 SDValue OpF32 =
7410 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7411 {Chain, InF32});
7412 Results.push_back(OpF32);
7413 Results.push_back(OpF32.getValue(1));
7414 }
7415 }
7416 break;
7417 }
7418 default:
7419 llvm_unreachable("Unexpected node to lower");
7420 }
7421}
7422
7423void
7429
7430const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
7431#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
7432 switch ((SystemZISD::NodeType)Opcode) {
7433 case SystemZISD::FIRST_NUMBER: break;
7434 OPCODE(RET_GLUE);
7435 OPCODE(CALL);
7436 OPCODE(SIBCALL);
7437 OPCODE(TLS_GDCALL);
7438 OPCODE(TLS_LDCALL);
7439 OPCODE(PCREL_WRAPPER);
7440 OPCODE(PCREL_OFFSET);
7441 OPCODE(ICMP);
7442 OPCODE(FCMP);
7443 OPCODE(STRICT_FCMP);
7444 OPCODE(STRICT_FCMPS);
7445 OPCODE(TM);
7446 OPCODE(BR_CCMASK);
7447 OPCODE(SELECT_CCMASK);
7448 OPCODE(ADJDYNALLOC);
7449 OPCODE(PROBED_ALLOCA);
7450 OPCODE(POPCNT);
7451 OPCODE(SMUL_LOHI);
7452 OPCODE(UMUL_LOHI);
7453 OPCODE(SDIVREM);
7454 OPCODE(UDIVREM);
7455 OPCODE(SADDO);
7456 OPCODE(SSUBO);
7457 OPCODE(UADDO);
7458 OPCODE(USUBO);
7459 OPCODE(ADDCARRY);
7460 OPCODE(SUBCARRY);
7461 OPCODE(GET_CCMASK);
7462 OPCODE(MVC);
7463 OPCODE(NC);
7464 OPCODE(OC);
7465 OPCODE(XC);
7466 OPCODE(CLC);
7467 OPCODE(MEMSET_MVC);
7468 OPCODE(STPCPY);
7469 OPCODE(STRCMP);
7470 OPCODE(SEARCH_STRING);
7471 OPCODE(IPM);
7472 OPCODE(TBEGIN);
7473 OPCODE(TBEGIN_NOFLOAT);
7474 OPCODE(TEND);
7475 OPCODE(BYTE_MASK);
7476 OPCODE(ROTATE_MASK);
7477 OPCODE(REPLICATE);
7478 OPCODE(JOIN_DWORDS);
7479 OPCODE(SPLAT);
7480 OPCODE(MERGE_HIGH);
7481 OPCODE(MERGE_LOW);
7482 OPCODE(SHL_DOUBLE);
7483 OPCODE(PERMUTE_DWORDS);
7484 OPCODE(PERMUTE);
7485 OPCODE(PACK);
7486 OPCODE(PACKS_CC);
7487 OPCODE(PACKLS_CC);
7488 OPCODE(UNPACK_HIGH);
7489 OPCODE(UNPACKL_HIGH);
7490 OPCODE(UNPACK_LOW);
7491 OPCODE(UNPACKL_LOW);
7492 OPCODE(VSHL_BY_SCALAR);
7493 OPCODE(VSRL_BY_SCALAR);
7494 OPCODE(VSRA_BY_SCALAR);
7495 OPCODE(VROTL_BY_SCALAR);
7496 OPCODE(SHL_DOUBLE_BIT);
7497 OPCODE(SHR_DOUBLE_BIT);
7498 OPCODE(VSUM);
7499 OPCODE(VACC);
7500 OPCODE(VSCBI);
7501 OPCODE(VAC);
7502 OPCODE(VSBI);
7503 OPCODE(VACCC);
7504 OPCODE(VSBCBI);
7505 OPCODE(VMAH);
7506 OPCODE(VMALH);
7507 OPCODE(VME);
7508 OPCODE(VMLE);
7509 OPCODE(VMO);
7510 OPCODE(VMLO);
7511 OPCODE(VICMPE);
7512 OPCODE(VICMPH);
7513 OPCODE(VICMPHL);
7514 OPCODE(VICMPES);
7515 OPCODE(VICMPHS);
7516 OPCODE(VICMPHLS);
7517 OPCODE(VFCMPE);
7518 OPCODE(STRICT_VFCMPE);
7519 OPCODE(STRICT_VFCMPES);
7520 OPCODE(VFCMPH);
7521 OPCODE(STRICT_VFCMPH);
7522 OPCODE(STRICT_VFCMPHS);
7523 OPCODE(VFCMPHE);
7524 OPCODE(STRICT_VFCMPHE);
7525 OPCODE(STRICT_VFCMPHES);
7526 OPCODE(VFCMPES);
7527 OPCODE(VFCMPHS);
7528 OPCODE(VFCMPHES);
7529 OPCODE(VFTCI);
7530 OPCODE(VEXTEND);
7531 OPCODE(STRICT_VEXTEND);
7532 OPCODE(VROUND);
7533 OPCODE(STRICT_VROUND);
7534 OPCODE(VTM);
7535 OPCODE(SCMP128HI);
7536 OPCODE(UCMP128HI);
7537 OPCODE(VFAE_CC);
7538 OPCODE(VFAEZ_CC);
7539 OPCODE(VFEE_CC);
7540 OPCODE(VFEEZ_CC);
7541 OPCODE(VFENE_CC);
7542 OPCODE(VFENEZ_CC);
7543 OPCODE(VISTR_CC);
7544 OPCODE(VSTRC_CC);
7545 OPCODE(VSTRCZ_CC);
7546 OPCODE(VSTRS_CC);
7547 OPCODE(VSTRSZ_CC);
7548 OPCODE(TDC);
7549 OPCODE(ATOMIC_SWAPW);
7550 OPCODE(ATOMIC_LOADW_ADD);
7551 OPCODE(ATOMIC_LOADW_SUB);
7552 OPCODE(ATOMIC_LOADW_AND);
7553 OPCODE(ATOMIC_LOADW_OR);
7554 OPCODE(ATOMIC_LOADW_XOR);
7555 OPCODE(ATOMIC_LOADW_NAND);
7556 OPCODE(ATOMIC_LOADW_MIN);
7557 OPCODE(ATOMIC_LOADW_MAX);
7558 OPCODE(ATOMIC_LOADW_UMIN);
7559 OPCODE(ATOMIC_LOADW_UMAX);
7560 OPCODE(ATOMIC_CMP_SWAPW);
7561 OPCODE(ATOMIC_CMP_SWAP);
7562 OPCODE(ATOMIC_LOAD_128);
7563 OPCODE(ATOMIC_STORE_128);
7564 OPCODE(ATOMIC_CMP_SWAP_128);
7565 OPCODE(LRV);
7566 OPCODE(STRV);
7567 OPCODE(VLER);
7568 OPCODE(VSTER);
7569 OPCODE(STCKF);
7571 OPCODE(ADA_ENTRY);
7572 }
7573 return nullptr;
7574#undef OPCODE
7575}
7576
7577// Return true if VT is a vector whose elements are a whole number of bytes
7578// in width. Also check for presence of vector support.
7579bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7580 if (!Subtarget.hasVector())
7581 return false;
7582
7583 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7584}
7585
7586// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7587// producing a result of type ResVT. Op is a possibly bitcast version
7588// of the input vector and Index is the index (based on type VecVT) that
7589// should be extracted. Return the new extraction if a simplification
7590// was possible or if Force is true.
7591SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7592 EVT VecVT, SDValue Op,
7593 unsigned Index,
7594 DAGCombinerInfo &DCI,
7595 bool Force) const {
7596 SelectionDAG &DAG = DCI.DAG;
7597
7598 // The number of bytes being extracted.
7599 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7600
7601 for (;;) {
7602 unsigned Opcode = Op.getOpcode();
7603 if (Opcode == ISD::BITCAST)
7604 // Look through bitcasts.
7605 Op = Op.getOperand(0);
7606 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7607 canTreatAsByteVector(Op.getValueType())) {
7608 // Get a VPERM-like permute mask and see whether the bytes covered
7609 // by the extracted element are a contiguous sequence from one
7610 // source operand.
7612 if (!getVPermMask(Op, Bytes))
7613 break;
7614 int First;
7615 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7616 BytesPerElement, First))
7617 break;
7618 if (First < 0)
7619 return DAG.getUNDEF(ResVT);
7620 // Make sure the contiguous sequence starts at a multiple of the
7621 // original element size.
7622 unsigned Byte = unsigned(First) % Bytes.size();
7623 if (Byte % BytesPerElement != 0)
7624 break;
7625 // We can get the extracted value directly from an input.
7626 Index = Byte / BytesPerElement;
7627 Op = Op.getOperand(unsigned(First) / Bytes.size());
7628 Force = true;
7629 } else if (Opcode == ISD::BUILD_VECTOR &&
7630 canTreatAsByteVector(Op.getValueType())) {
7631 // We can only optimize this case if the BUILD_VECTOR elements are
7632 // at least as wide as the extracted value.
7633 EVT OpVT = Op.getValueType();
7634 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7635 if (OpBytesPerElement < BytesPerElement)
7636 break;
7637 // Make sure that the least-significant bit of the extracted value
7638 // is the least significant bit of an input.
7639 unsigned End = (Index + 1) * BytesPerElement;
7640 if (End % OpBytesPerElement != 0)
7641 break;
7642 // We're extracting the low part of one operand of the BUILD_VECTOR.
7643 Op = Op.getOperand(End / OpBytesPerElement - 1);
7644 if (!Op.getValueType().isInteger()) {
7645 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7646 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7647 DCI.AddToWorklist(Op.getNode());
7648 }
7649 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7650 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7651 if (VT != ResVT) {
7652 DCI.AddToWorklist(Op.getNode());
7653 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7654 }
7655 return Op;
7656 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7658 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7659 canTreatAsByteVector(Op.getValueType()) &&
7660 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7661 // Make sure that only the unextended bits are significant.
7662 EVT ExtVT = Op.getValueType();
7663 EVT OpVT = Op.getOperand(0).getValueType();
7664 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7665 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7666 unsigned Byte = Index * BytesPerElement;
7667 unsigned SubByte = Byte % ExtBytesPerElement;
7668 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7669 if (SubByte < MinSubByte ||
7670 SubByte + BytesPerElement > ExtBytesPerElement)
7671 break;
7672 // Get the byte offset of the unextended element
7673 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7674 // ...then add the byte offset relative to that element.
7675 Byte += SubByte - MinSubByte;
7676 if (Byte % BytesPerElement != 0)
7677 break;
7678 Op = Op.getOperand(0);
7679 Index = Byte / BytesPerElement;
7680 Force = true;
7681 } else
7682 break;
7683 }
7684 if (Force) {
7685 if (Op.getValueType() != VecVT) {
7686 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7687 DCI.AddToWorklist(Op.getNode());
7688 }
7689 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7690 DAG.getConstant(Index, DL, MVT::i32));
7691 }
7692 return SDValue();
7693}
7694
7695// Optimize vector operations in scalar value Op on the basis that Op
7696// is truncated to TruncVT.
7697SDValue SystemZTargetLowering::combineTruncateExtract(
7698 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7699 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7700 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7701 // of type TruncVT.
7702 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7703 TruncVT.getSizeInBits() % 8 == 0) {
7704 SDValue Vec = Op.getOperand(0);
7705 EVT VecVT = Vec.getValueType();
7706 if (canTreatAsByteVector(VecVT)) {
7707 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7708 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7709 unsigned TruncBytes = TruncVT.getStoreSize();
7710 if (BytesPerElement % TruncBytes == 0) {
7711 // Calculate the value of Y' in the above description. We are
7712 // splitting the original elements into Scale equal-sized pieces
7713 // and for truncation purposes want the last (least-significant)
7714 // of these pieces for IndexN. This is easiest to do by calculating
7715 // the start index of the following element and then subtracting 1.
7716 unsigned Scale = BytesPerElement / TruncBytes;
7717 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7718
7719 // Defer the creation of the bitcast from X to combineExtract,
7720 // which might be able to optimize the extraction.
7721 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7722 MVT::getIntegerVT(TruncBytes * 8),
7723 VecVT.getStoreSize() / TruncBytes);
7724 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7725 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7726 }
7727 }
7728 }
7729 }
7730 return SDValue();
7731}
7732
7733SDValue SystemZTargetLowering::combineZERO_EXTEND(
7734 SDNode *N, DAGCombinerInfo &DCI) const {
7735 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7736 SelectionDAG &DAG = DCI.DAG;
7737 SDValue N0 = N->getOperand(0);
7738 EVT VT = N->getValueType(0);
7740 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7741 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7742 if (TrueOp && FalseOp) {
7743 SDLoc DL(N0);
7744 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7745 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7746 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7747 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7748 // If N0 has multiple uses, change other uses as well.
7749 if (!N0.hasOneUse()) {
7750 SDValue TruncSelect =
7751 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7752 DCI.CombineTo(N0.getNode(), TruncSelect);
7753 }
7754 return NewSelect;
7755 }
7756 }
7757 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7758 // of the result is smaller than the size of X and all the truncated bits
7759 // of X are already zero.
7760 if (N0.getOpcode() == ISD::XOR &&
7761 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7762 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7763 N0.getOperand(1).getOpcode() == ISD::Constant) {
7764 SDValue X = N0.getOperand(0).getOperand(0);
7765 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7766 KnownBits Known = DAG.computeKnownBits(X);
7767 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7768 N0.getValueSizeInBits(),
7769 VT.getSizeInBits());
7770 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7771 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7772 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7773 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7774 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7775 }
7776 }
7777 }
7778 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7779 // and VECTOR ADD COMPUTE CARRY for i128:
7780 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7781 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7782 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7783 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7784 // For vector types, these patterns are recognized in the .td file.
7785 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7786 N0.getOperand(0).getValueType() == VT) {
7787 SDValue Op0 = N0.getOperand(0);
7788 SDValue Op1 = N0.getOperand(1);
7789 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7790 switch (CC) {
7791 case ISD::SETULE:
7792 std::swap(Op0, Op1);
7793 [[fallthrough]];
7794 case ISD::SETUGE:
7795 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7796 case ISD::SETUGT:
7797 std::swap(Op0, Op1);
7798 [[fallthrough]];
7799 case ISD::SETULT:
7800 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7801 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7802 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7803 Op0->getOperand(1));
7804 break;
7805 default:
7806 break;
7807 }
7808 }
7809
7810 return SDValue();
7811}
7812
7813SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7814 SDNode *N, DAGCombinerInfo &DCI) const {
7815 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7816 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7817 // into (select_cc LHS, RHS, -1, 0, COND)
7818 SelectionDAG &DAG = DCI.DAG;
7819 SDValue N0 = N->getOperand(0);
7820 EVT VT = N->getValueType(0);
7821 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7822 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7823 N0 = N0.getOperand(0);
7824 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7825 SDLoc DL(N0);
7826 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7827 DAG.getAllOnesConstant(DL, VT),
7828 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7829 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7830 }
7831 return SDValue();
7832}
7833
7834SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7835 SDNode *N, DAGCombinerInfo &DCI) const {
7836 // Convert (sext (ashr (shl X, C1), C2)) to
7837 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7838 // cheap as narrower ones.
7839 SelectionDAG &DAG = DCI.DAG;
7840 SDValue N0 = N->getOperand(0);
7841 EVT VT = N->getValueType(0);
7842 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7843 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7844 SDValue Inner = N0.getOperand(0);
7845 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7846 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7847 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7848 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7849 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7850 EVT ShiftVT = N0.getOperand(1).getValueType();
7851 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7852 Inner.getOperand(0));
7853 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7854 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7855 ShiftVT));
7856 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7857 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7858 }
7859 }
7860 }
7861
7862 return SDValue();
7863}
7864
7865SDValue SystemZTargetLowering::combineMERGE(
7866 SDNode *N, DAGCombinerInfo &DCI) const {
7867 SelectionDAG &DAG = DCI.DAG;
7868 unsigned Opcode = N->getOpcode();
7869 SDValue Op0 = N->getOperand(0);
7870 SDValue Op1 = N->getOperand(1);
7871 if (Op0.getOpcode() == ISD::BITCAST)
7872 Op0 = Op0.getOperand(0);
7874 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7875 // for v4f32.
7876 if (Op1 == N->getOperand(0))
7877 return Op1;
7878 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7879 EVT VT = Op1.getValueType();
7880 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7881 if (ElemBytes <= 4) {
7882 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7884 EVT InVT = VT.changeVectorElementTypeToInteger();
7885 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7886 SystemZ::VectorBytes / ElemBytes / 2);
7887 if (VT != InVT) {
7888 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7889 DCI.AddToWorklist(Op1.getNode());
7890 }
7891 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7892 DCI.AddToWorklist(Op.getNode());
7893 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7894 }
7895 }
7896 return SDValue();
7897}
7898
7899static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7900 SDNode *&HiPart) {
7901 LoPart = HiPart = nullptr;
7902
7903 // Scan through all users.
7904 for (SDUse &Use : LD->uses()) {
7905 // Skip the uses of the chain.
7906 if (Use.getResNo() != 0)
7907 continue;
7908
7909 // Verify every user is a TRUNCATE to i64 of the low or high half.
7910 SDNode *User = Use.getUser();
7911 bool IsLoPart = true;
7912 if (User->getOpcode() == ISD::SRL &&
7913 User->getOperand(1).getOpcode() == ISD::Constant &&
7914 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7915 User = *User->user_begin();
7916 IsLoPart = false;
7917 }
7918 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7919 return false;
7920
7921 if (IsLoPart) {
7922 if (LoPart)
7923 return false;
7924 LoPart = User;
7925 } else {
7926 if (HiPart)
7927 return false;
7928 HiPart = User;
7929 }
7930 }
7931 return true;
7932}
7933
7934static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7935 SDNode *&HiPart) {
7936 LoPart = HiPart = nullptr;
7937
7938 // Scan through all users.
7939 for (SDUse &Use : LD->uses()) {
7940 // Skip the uses of the chain.
7941 if (Use.getResNo() != 0)
7942 continue;
7943
7944 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7945 SDNode *User = Use.getUser();
7946 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7947 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7948 return false;
7949
7950 switch (User->getConstantOperandVal(1)) {
7951 case SystemZ::subreg_l64:
7952 if (LoPart)
7953 return false;
7954 LoPart = User;
7955 break;
7956 case SystemZ::subreg_h64:
7957 if (HiPart)
7958 return false;
7959 HiPart = User;
7960 break;
7961 default:
7962 return false;
7963 }
7964 }
7965 return true;
7966}
7967
7968SDValue SystemZTargetLowering::combineLOAD(
7969 SDNode *N, DAGCombinerInfo &DCI) const {
7970 SelectionDAG &DAG = DCI.DAG;
7971 EVT LdVT = N->getValueType(0);
7972 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7973 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7974 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7975 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7976 if (PtrVT != LoadNodeVT) {
7977 SDLoc DL(LN);
7978 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7979 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7980 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7981 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7982 LN->getMemOperand());
7983 }
7984 }
7985 }
7986 SDLoc DL(N);
7987
7988 // Replace a 128-bit load that is used solely to move its value into GPRs
7989 // by separate loads of both halves.
7990 LoadSDNode *LD = cast<LoadSDNode>(N);
7991 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7992 SDNode *LoPart, *HiPart;
7993 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7994 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7995 // Rewrite each extraction as an independent load.
7996 SmallVector<SDValue, 2> ArgChains;
7997 if (HiPart) {
7998 SDValue EltLoad = DAG.getLoad(
7999 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
8000 LD->getPointerInfo(), LD->getBaseAlign(),
8001 LD->getMemOperand()->getFlags(), LD->getAAInfo());
8002
8003 DCI.CombineTo(HiPart, EltLoad, true);
8004 ArgChains.push_back(EltLoad.getValue(1));
8005 }
8006 if (LoPart) {
8007 SDValue EltLoad = DAG.getLoad(
8008 LoPart->getValueType(0), DL, LD->getChain(),
8009 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
8010 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
8011 LD->getMemOperand()->getFlags(), LD->getAAInfo());
8012
8013 DCI.CombineTo(LoPart, EltLoad, true);
8014 ArgChains.push_back(EltLoad.getValue(1));
8015 }
8016
8017 // Collect all chains via TokenFactor.
8018 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
8019 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
8020 DCI.AddToWorklist(Chain.getNode());
8021 return SDValue(N, 0);
8022 }
8023 }
8024
8025 if (LdVT.isVector() || LdVT.isInteger())
8026 return SDValue();
8027 // Transform a scalar load that is REPLICATEd as well as having other
8028 // use(s) to the form where the other use(s) use the first element of the
8029 // REPLICATE instead of the load. Otherwise instruction selection will not
8030 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
8031 // point loads.
8032
8033 SDValue Replicate;
8034 SmallVector<SDNode*, 8> OtherUses;
8035 for (SDUse &Use : N->uses()) {
8036 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
8037 if (Replicate)
8038 return SDValue(); // Should never happen
8039 Replicate = SDValue(Use.getUser(), 0);
8040 } else if (Use.getResNo() == 0)
8041 OtherUses.push_back(Use.getUser());
8042 }
8043 if (!Replicate || OtherUses.empty())
8044 return SDValue();
8045
8046 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
8047 Replicate, DAG.getConstant(0, DL, MVT::i32));
8048 // Update uses of the loaded Value while preserving old chains.
8049 for (SDNode *U : OtherUses) {
8051 for (SDValue Op : U->ops())
8052 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
8053 DAG.UpdateNodeOperands(U, Ops);
8054 }
8055 return SDValue(N, 0);
8056}
8057
8058bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
8059 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
8060 return true;
8061 if (Subtarget.hasVectorEnhancements2())
8062 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
8063 return true;
8064 return false;
8065}
8066
8068 if (!VT.isVector() || !VT.isSimple() ||
8069 VT.getSizeInBits() != 128 ||
8070 VT.getScalarSizeInBits() % 8 != 0)
8071 return false;
8072
8073 unsigned NumElts = VT.getVectorNumElements();
8074 for (unsigned i = 0; i < NumElts; ++i) {
8075 if (M[i] < 0) continue; // ignore UNDEF indices
8076 if ((unsigned) M[i] != NumElts - 1 - i)
8077 return false;
8078 }
8079
8080 return true;
8081}
8082
8083static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
8084 for (auto *U : StoredVal->users()) {
8085 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
8086 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
8087 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
8088 continue;
8089 } else if (isa<BuildVectorSDNode>(U)) {
8090 SDValue BuildVector = SDValue(U, 0);
8091 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
8092 isOnlyUsedByStores(BuildVector, DAG))
8093 continue;
8094 }
8095 return false;
8096 }
8097 return true;
8098}
8099
8100static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
8101 SDValue &HiPart) {
8102 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
8103 return false;
8104
8105 SDValue Op0 = Val.getOperand(0);
8106 SDValue Op1 = Val.getOperand(1);
8107
8108 if (Op0.getOpcode() == ISD::SHL)
8109 std::swap(Op0, Op1);
8110 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
8111 Op1.getOperand(1).getOpcode() != ISD::Constant ||
8112 Op1.getConstantOperandVal(1) != 64)
8113 return false;
8114 Op1 = Op1.getOperand(0);
8115
8116 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
8117 Op0.getOperand(0).getValueType() != MVT::i64)
8118 return false;
8119 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
8120 Op1.getOperand(0).getValueType() != MVT::i64)
8121 return false;
8122
8123 LoPart = Op0.getOperand(0);
8124 HiPart = Op1.getOperand(0);
8125 return true;
8126}
8127
8128static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8129 SDValue &HiPart) {
8130 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8131 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8132 return false;
8133
8134 if (Val->getNumOperands() != 5 ||
8135 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8136 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8137 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8138 return false;
8139
8140 LoPart = Val->getOperand(1);
8141 HiPart = Val->getOperand(3);
8142 return true;
8143}
8144
8145SDValue SystemZTargetLowering::combineSTORE(
8146 SDNode *N, DAGCombinerInfo &DCI) const {
8147 SelectionDAG &DAG = DCI.DAG;
8148 auto *SN = cast<StoreSDNode>(N);
8149 auto &Op1 = N->getOperand(1);
8150 EVT MemVT = SN->getMemoryVT();
8151
8152 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8153 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8154 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8155 if (PtrVT != StoreNodeVT) {
8156 SDLoc DL(SN);
8157 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8158 SYSTEMZAS::PTR32, 0);
8159 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8160 SN->getPointerInfo(), SN->getBaseAlign(),
8161 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8162 }
8163 }
8164
8165 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8166 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8167 // If X has wider elements then convert it to:
8168 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8169 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8170 if (SDValue Value =
8171 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8172 DCI.AddToWorklist(Value.getNode());
8173
8174 // Rewrite the store with the new form of stored value.
8175 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8176 SN->getBasePtr(), SN->getMemoryVT(),
8177 SN->getMemOperand());
8178 }
8179 }
8180 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8181 if (!SN->isTruncatingStore() &&
8182 Op1.getOpcode() == ISD::BSWAP &&
8183 Op1.getNode()->hasOneUse() &&
8184 canLoadStoreByteSwapped(Op1.getValueType())) {
8185
8186 SDValue BSwapOp = Op1.getOperand(0);
8187
8188 if (BSwapOp.getValueType() == MVT::i16)
8189 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8190
8191 SDValue Ops[] = {
8192 N->getOperand(0), BSwapOp, N->getOperand(2)
8193 };
8194
8195 return
8196 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8197 Ops, MemVT, SN->getMemOperand());
8198 }
8199 // Combine STORE (element-swap) into VSTER
8200 if (!SN->isTruncatingStore() &&
8201 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8202 Op1.getNode()->hasOneUse() &&
8203 Subtarget.hasVectorEnhancements2()) {
8204 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8205 ArrayRef<int> ShuffleMask = SVN->getMask();
8206 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8207 SDValue Ops[] = {
8208 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8209 };
8210
8211 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8212 DAG.getVTList(MVT::Other),
8213 Ops, MemVT, SN->getMemOperand());
8214 }
8215 }
8216
8217 // Combine STORE (READCYCLECOUNTER) into STCKF.
8218 if (!SN->isTruncatingStore() &&
8219 Op1.getOpcode() == ISD::READCYCLECOUNTER &&
8220 Op1.hasOneUse() &&
8221 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8222 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8223 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8224 DAG.getVTList(MVT::Other),
8225 Ops, MemVT, SN->getMemOperand());
8226 }
8227
8228 // Transform a store of a 128-bit value moved from parts into two stores.
8229 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8230 SDValue LoPart, HiPart;
8231 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8232 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8233 SDLoc DL(SN);
8234 SDValue Chain0 = DAG.getStore(
8235 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8236 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8237 SDValue Chain1 = DAG.getStore(
8238 SN->getChain(), DL, LoPart,
8239 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8240 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8241 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8242
8243 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8244 }
8245 }
8246
8247 // Replicate a reg or immediate with VREP instead of scalar multiply or
8248 // immediate load. It seems best to do this during the first DAGCombine as
8249 // it is straight-forward to handle the zero-extend node in the initial
8250 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8251 // extracting an i16 element from a v16i8 vector).
8252 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8253 isOnlyUsedByStores(Op1, DAG)) {
8254 SDValue Word = SDValue();
8255 EVT WordVT;
8256
8257 // Find a replicated immediate and return it if found in Word and its
8258 // type in WordVT.
8259 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8260 // Some constants are better handled with a scalar store.
8261 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8262 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8263 return;
8264
8265 APInt Val = C->getAPIntValue();
8266 // Truncate Val in case of a truncating store.
8267 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8268 assert(SN->isTruncatingStore() &&
8269 "Non-truncating store and immediate value does not fit?");
8270 Val = Val.trunc(TotBytes * 8);
8271 }
8272
8273 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8274 if (VCI.isVectorConstantLegal(Subtarget) &&
8275 VCI.Opcode == SystemZISD::REPLICATE) {
8276 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8277 WordVT = VCI.VecVT.getScalarType();
8278 }
8279 };
8280
8281 // Find a replicated register and return it if found in Word and its type
8282 // in WordVT.
8283 auto FindReplicatedReg = [&](SDValue MulOp) {
8284 EVT MulVT = MulOp.getValueType();
8285 if (MulOp->getOpcode() == ISD::MUL &&
8286 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8287 // Find a zero extended value and its type.
8288 SDValue LHS = MulOp->getOperand(0);
8289 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8290 WordVT = LHS->getOperand(0).getValueType();
8291 else if (LHS->getOpcode() == ISD::AssertZext)
8292 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8293 else
8294 return;
8295 // Find a replicating constant, e.g. 0x00010001.
8296 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8297 SystemZVectorConstantInfo VCI(
8298 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8299 if (VCI.isVectorConstantLegal(Subtarget) &&
8300 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8301 WordVT == VCI.VecVT.getScalarType())
8302 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8303 }
8304 }
8305 };
8306
8307 if (isa<BuildVectorSDNode>(Op1) &&
8308 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8309 SDValue SplatVal = Op1->getOperand(0);
8310 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8311 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8312 else
8313 FindReplicatedReg(SplatVal);
8314 } else {
8315 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8316 FindReplicatedImm(C, MemVT.getStoreSize());
8317 else
8318 FindReplicatedReg(Op1);
8319 }
8320
8321 if (Word != SDValue()) {
8322 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8323 "Bad type handling");
8324 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8325 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8326 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8327 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8328 SN->getBasePtr(), SN->getMemOperand());
8329 }
8330 }
8331
8332 return SDValue();
8333}
8334
8335SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8336 SDNode *N, DAGCombinerInfo &DCI) const {
8337 SelectionDAG &DAG = DCI.DAG;
8338 // Combine element-swap (LOAD) into VLER
8339 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8340 N->getOperand(0).hasOneUse() &&
8341 Subtarget.hasVectorEnhancements2()) {
8342 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8343 ArrayRef<int> ShuffleMask = SVN->getMask();
8344 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8345 SDValue Load = N->getOperand(0);
8346 LoadSDNode *LD = cast<LoadSDNode>(Load);
8347
8348 // Create the element-swapping load.
8349 SDValue Ops[] = {
8350 LD->getChain(), // Chain
8351 LD->getBasePtr() // Ptr
8352 };
8353 SDValue ESLoad =
8355 DAG.getVTList(LD->getValueType(0), MVT::Other),
8356 Ops, LD->getMemoryVT(), LD->getMemOperand());
8357
8358 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8359 // by the load dead.
8360 DCI.CombineTo(N, ESLoad);
8361
8362 // Next, combine the load away, we give it a bogus result value but a real
8363 // chain result. The result value is dead because the shuffle is dead.
8364 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8365
8366 // Return N so it doesn't get rechecked!
8367 return SDValue(N, 0);
8368 }
8369 }
8370
8371 return SDValue();
8372}
8373
8374SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8375 SDNode *N, DAGCombinerInfo &DCI) const {
8376 SelectionDAG &DAG = DCI.DAG;
8377
8378 if (!Subtarget.hasVector())
8379 return SDValue();
8380
8381 // Look through bitcasts that retain the number of vector elements.
8382 SDValue Op = N->getOperand(0);
8383 if (Op.getOpcode() == ISD::BITCAST &&
8384 Op.getValueType().isVector() &&
8385 Op.getOperand(0).getValueType().isVector() &&
8386 Op.getValueType().getVectorNumElements() ==
8387 Op.getOperand(0).getValueType().getVectorNumElements())
8388 Op = Op.getOperand(0);
8389
8390 // Pull BSWAP out of a vector extraction.
8391 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8392 EVT VecVT = Op.getValueType();
8393 EVT EltVT = VecVT.getVectorElementType();
8394 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8395 Op.getOperand(0), N->getOperand(1));
8396 DCI.AddToWorklist(Op.getNode());
8397 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8398 if (EltVT != N->getValueType(0)) {
8399 DCI.AddToWorklist(Op.getNode());
8400 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8401 }
8402 return Op;
8403 }
8404
8405 // Try to simplify a vector extraction.
8406 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8407 SDValue Op0 = N->getOperand(0);
8408 EVT VecVT = Op0.getValueType();
8409 if (canTreatAsByteVector(VecVT))
8410 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8411 IndexN->getZExtValue(), DCI, false);
8412 }
8413 return SDValue();
8414}
8415
8416SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8417 SDNode *N, DAGCombinerInfo &DCI) const {
8418 SelectionDAG &DAG = DCI.DAG;
8419 // (join_dwords X, X) == (replicate X)
8420 if (N->getOperand(0) == N->getOperand(1))
8421 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8422 N->getOperand(0));
8423 return SDValue();
8424}
8425
8427 SDValue Chain1 = N1->getOperand(0);
8428 SDValue Chain2 = N2->getOperand(0);
8429
8430 // Trivial case: both nodes take the same chain.
8431 if (Chain1 == Chain2)
8432 return Chain1;
8433
8434 // FIXME - we could handle more complex cases via TokenFactor,
8435 // assuming we can verify that this would not create a cycle.
8436 return SDValue();
8437}
8438
8439SDValue SystemZTargetLowering::combineFP_ROUND(
8440 SDNode *N, DAGCombinerInfo &DCI) const {
8441
8442 if (!Subtarget.hasVector())
8443 return SDValue();
8444
8445 // (fpround (extract_vector_elt X 0))
8446 // (fpround (extract_vector_elt X 1)) ->
8447 // (extract_vector_elt (VROUND X) 0)
8448 // (extract_vector_elt (VROUND X) 2)
8449 //
8450 // This is a special case since the target doesn't really support v2f32s.
8451 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8452 SelectionDAG &DAG = DCI.DAG;
8453 SDValue Op0 = N->getOperand(OpNo);
8454 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8456 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8457 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8458 Op0.getConstantOperandVal(1) == 0) {
8459 SDValue Vec = Op0.getOperand(0);
8460 for (auto *U : Vec->users()) {
8461 if (U != Op0.getNode() && U->hasOneUse() &&
8462 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8463 U->getOperand(0) == Vec &&
8464 U->getOperand(1).getOpcode() == ISD::Constant &&
8465 U->getConstantOperandVal(1) == 1) {
8466 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8467 if (OtherRound.getOpcode() == N->getOpcode() &&
8468 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8469 OtherRound.getValueType() == MVT::f32) {
8470 SDValue VRound, Chain;
8471 if (N->isStrictFPOpcode()) {
8472 Chain = MergeInputChains(N, OtherRound.getNode());
8473 if (!Chain)
8474 continue;
8475 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8476 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8477 Chain = VRound.getValue(1);
8478 } else
8479 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8480 MVT::v4f32, Vec);
8481 DCI.AddToWorklist(VRound.getNode());
8482 SDValue Extract1 =
8483 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8484 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8485 DCI.AddToWorklist(Extract1.getNode());
8486 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8487 if (Chain)
8488 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8489 SDValue Extract0 =
8490 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8491 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8492 if (Chain)
8493 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8494 N->getVTList(), Extract0, Chain);
8495 return Extract0;
8496 }
8497 }
8498 }
8499 }
8500 return SDValue();
8501}
8502
8503SDValue SystemZTargetLowering::combineFP_EXTEND(
8504 SDNode *N, DAGCombinerInfo &DCI) const {
8505
8506 if (!Subtarget.hasVector())
8507 return SDValue();
8508
8509 // (fpextend (extract_vector_elt X 0))
8510 // (fpextend (extract_vector_elt X 2)) ->
8511 // (extract_vector_elt (VEXTEND X) 0)
8512 // (extract_vector_elt (VEXTEND X) 1)
8513 //
8514 // This is a special case since the target doesn't really support v2f32s.
8515 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8516 SelectionDAG &DAG = DCI.DAG;
8517 SDValue Op0 = N->getOperand(OpNo);
8518 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8520 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8521 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8522 Op0.getConstantOperandVal(1) == 0) {
8523 SDValue Vec = Op0.getOperand(0);
8524 for (auto *U : Vec->users()) {
8525 if (U != Op0.getNode() && U->hasOneUse() &&
8526 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8527 U->getOperand(0) == Vec &&
8528 U->getOperand(1).getOpcode() == ISD::Constant &&
8529 U->getConstantOperandVal(1) == 2) {
8530 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8531 if (OtherExtend.getOpcode() == N->getOpcode() &&
8532 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8533 OtherExtend.getValueType() == MVT::f64) {
8534 SDValue VExtend, Chain;
8535 if (N->isStrictFPOpcode()) {
8536 Chain = MergeInputChains(N, OtherExtend.getNode());
8537 if (!Chain)
8538 continue;
8539 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8540 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8541 Chain = VExtend.getValue(1);
8542 } else
8543 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8544 MVT::v2f64, Vec);
8545 DCI.AddToWorklist(VExtend.getNode());
8546 SDValue Extract1 =
8547 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8548 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8549 DCI.AddToWorklist(Extract1.getNode());
8550 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8551 if (Chain)
8552 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8553 SDValue Extract0 =
8554 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8555 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8556 if (Chain)
8557 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8558 N->getVTList(), Extract0, Chain);
8559 return Extract0;
8560 }
8561 }
8562 }
8563 }
8564 return SDValue();
8565}
8566
8567SDValue SystemZTargetLowering::combineINT_TO_FP(
8568 SDNode *N, DAGCombinerInfo &DCI) const {
8569 if (DCI.Level != BeforeLegalizeTypes)
8570 return SDValue();
8571 SelectionDAG &DAG = DCI.DAG;
8572 LLVMContext &Ctx = *DAG.getContext();
8573 unsigned Opcode = N->getOpcode();
8574 EVT OutVT = N->getValueType(0);
8575 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8576 SDValue Op = N->getOperand(0);
8577 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8578 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8579
8580 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8581 // v2f64 = uint_to_fp v2i16
8582 // =>
8583 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8584 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8585 OutScalarBits <= 64) {
8586 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8587 EVT ExtVT = EVT::getVectorVT(
8588 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8589 unsigned ExtOpcode =
8591 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8592 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8593 }
8594 return SDValue();
8595}
8596
8597SDValue SystemZTargetLowering::combineFCOPYSIGN(
8598 SDNode *N, DAGCombinerInfo &DCI) const {
8599 SelectionDAG &DAG = DCI.DAG;
8600 EVT VT = N->getValueType(0);
8601 SDValue ValOp = N->getOperand(0);
8602 SDValue SignOp = N->getOperand(1);
8603
8604 // Remove the rounding which is not needed.
8605 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8606 SDValue WideOp = SignOp.getOperand(0);
8607 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8608 }
8609
8610 return SDValue();
8611}
8612
8613SDValue SystemZTargetLowering::combineBSWAP(
8614 SDNode *N, DAGCombinerInfo &DCI) const {
8615 SelectionDAG &DAG = DCI.DAG;
8616 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8617 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8618 N->getOperand(0).hasOneUse() &&
8619 canLoadStoreByteSwapped(N->getValueType(0))) {
8620 SDValue Load = N->getOperand(0);
8621 LoadSDNode *LD = cast<LoadSDNode>(Load);
8622
8623 // Create the byte-swapping load.
8624 SDValue Ops[] = {
8625 LD->getChain(), // Chain
8626 LD->getBasePtr() // Ptr
8627 };
8628 EVT LoadVT = N->getValueType(0);
8629 if (LoadVT == MVT::i16)
8630 LoadVT = MVT::i32;
8631 SDValue BSLoad =
8633 DAG.getVTList(LoadVT, MVT::Other),
8634 Ops, LD->getMemoryVT(), LD->getMemOperand());
8635
8636 // If this is an i16 load, insert the truncate.
8637 SDValue ResVal = BSLoad;
8638 if (N->getValueType(0) == MVT::i16)
8639 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8640
8641 // First, combine the bswap away. This makes the value produced by the
8642 // load dead.
8643 DCI.CombineTo(N, ResVal);
8644
8645 // Next, combine the load away, we give it a bogus result value but a real
8646 // chain result. The result value is dead because the bswap is dead.
8647 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8648
8649 // Return N so it doesn't get rechecked!
8650 return SDValue(N, 0);
8651 }
8652
8653 // Look through bitcasts that retain the number of vector elements.
8654 SDValue Op = N->getOperand(0);
8655 if (Op.getOpcode() == ISD::BITCAST &&
8656 Op.getValueType().isVector() &&
8657 Op.getOperand(0).getValueType().isVector() &&
8658 Op.getValueType().getVectorNumElements() ==
8659 Op.getOperand(0).getValueType().getVectorNumElements())
8660 Op = Op.getOperand(0);
8661
8662 // Push BSWAP into a vector insertion if at least one side then simplifies.
8663 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8664 SDValue Vec = Op.getOperand(0);
8665 SDValue Elt = Op.getOperand(1);
8666 SDValue Idx = Op.getOperand(2);
8667
8669 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8671 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8672 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8673 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8674 EVT VecVT = N->getValueType(0);
8675 EVT EltVT = N->getValueType(0).getVectorElementType();
8676 if (VecVT != Vec.getValueType()) {
8677 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8678 DCI.AddToWorklist(Vec.getNode());
8679 }
8680 if (EltVT != Elt.getValueType()) {
8681 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8682 DCI.AddToWorklist(Elt.getNode());
8683 }
8684 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8685 DCI.AddToWorklist(Vec.getNode());
8686 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8687 DCI.AddToWorklist(Elt.getNode());
8688 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8689 Vec, Elt, Idx);
8690 }
8691 }
8692
8693 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8694 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8695 if (SV && Op.hasOneUse()) {
8696 SDValue Op0 = Op.getOperand(0);
8697 SDValue Op1 = Op.getOperand(1);
8698
8700 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8702 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8703 EVT VecVT = N->getValueType(0);
8704 if (VecVT != Op0.getValueType()) {
8705 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8706 DCI.AddToWorklist(Op0.getNode());
8707 }
8708 if (VecVT != Op1.getValueType()) {
8709 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8710 DCI.AddToWorklist(Op1.getNode());
8711 }
8712 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8713 DCI.AddToWorklist(Op0.getNode());
8714 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8715 DCI.AddToWorklist(Op1.getNode());
8716 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8717 }
8718 }
8719
8720 return SDValue();
8721}
8722
8723SDValue SystemZTargetLowering::combineSETCC(
8724 SDNode *N, DAGCombinerInfo &DCI) const {
8725 SelectionDAG &DAG = DCI.DAG;
8726 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8727 const SDValue LHS = N->getOperand(0);
8728 const SDValue RHS = N->getOperand(1);
8729 bool CmpNull = isNullConstant(RHS);
8730 bool CmpAllOnes = isAllOnesConstant(RHS);
8731 EVT VT = N->getValueType(0);
8732 SDLoc DL(N);
8733
8734 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8735 // change the outer compare to a i128 compare. This will normally
8736 // allow the reduction to be recognized in adjustICmp128, and even if
8737 // not, the i128 compare will still generate better code.
8738 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8740 if (Src.getOpcode() == ISD::SETCC &&
8741 Src.getValueType().isFixedLengthVector() &&
8742 Src.getValueType().getScalarType() == MVT::i1) {
8743 EVT CmpVT = Src.getOperand(0).getValueType();
8744 if (CmpVT.getSizeInBits() == 128) {
8745 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8746 SDValue LHS =
8747 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8748 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8749 : DAG.getAllOnesConstant(DL, MVT::i128);
8750 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8751 N->getFlags());
8752 }
8753 }
8754 }
8755
8756 return SDValue();
8757}
8758
8759static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
8760 switch (Val.getOpcode()) {
8761 default:
8762 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8763 case SystemZISD::IPM:
8764 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8766 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8767 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8769 SDValue Op4CCReg = Val.getOperand(4);
8770 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8771 Op4CCReg.getOpcode() == SystemZISD::TM) {
8772 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
8773 if (OpCC != SDValue())
8774 return std::make_pair(OpCC, OpCCValid);
8775 }
8776 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8777 if (!CCValid)
8778 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8779 int CCValidVal = CCValid->getZExtValue();
8780 return std::make_pair(Op4CCReg, CCValidVal);
8781 }
8782 case ISD::ADD:
8783 case ISD::AND:
8784 case ISD::OR:
8785 case ISD::XOR:
8786 case ISD::SHL:
8787 case ISD::SRA:
8788 case ISD::SRL:
8789 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
8790 if (Op0CC != SDValue())
8791 return std::make_pair(Op0CC, Op0CCValid);
8792 return findCCUse(Val.getOperand(1));
8793 }
8794}
8795
8796static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8797 SelectionDAG &DAG);
8798
8800 SelectionDAG &DAG) {
8801 SDLoc DL(Val);
8802 auto Opcode = Val.getOpcode();
8803 switch (Opcode) {
8804 default:
8805 return {};
8806 case ISD::Constant:
8807 return {Val, Val, Val, Val};
8808 case SystemZISD::IPM: {
8809 SDValue IPMOp0 = Val.getOperand(0);
8810 if (IPMOp0 != CC)
8811 return {};
8812 SmallVector<SDValue, 4> ShiftedCCVals;
8813 for (auto CC : {0, 1, 2, 3})
8814 ShiftedCCVals.emplace_back(
8815 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8816 return ShiftedCCVals;
8817 }
8819 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8820 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8821 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8822 if (!CCValid || !CCMask)
8823 return {};
8824
8825 int CCValidVal = CCValid->getZExtValue();
8826 int CCMaskVal = CCMask->getZExtValue();
8827 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8828 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8829 if (TrueSDVals.empty() || FalseSDVals.empty())
8830 return {};
8831 SDValue Op4CCReg = Val.getOperand(4);
8832 if (Op4CCReg != CC)
8833 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8834 if (Op4CCReg != CC)
8835 return {};
8836 SmallVector<SDValue, 4> MergedSDVals;
8837 for (auto &CCVal : {0, 1, 2, 3})
8838 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8839 ? TrueSDVals[CCVal]
8840 : FalseSDVals[CCVal]);
8841 return MergedSDVals;
8842 }
8843 case ISD::ADD:
8844 case ISD::AND:
8845 case ISD::OR:
8846 case ISD::XOR:
8847 case ISD::SRA:
8848 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8849 // would clobber CC).
8850 if (!Val.hasOneUse())
8851 return {};
8852 [[fallthrough]];
8853 case ISD::SHL:
8854 case ISD::SRL:
8855 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8856 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8857 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8858 if (Op0SDVals.empty() || Op1SDVals.empty())
8859 return {};
8860 SmallVector<SDValue, 4> BinaryOpSDVals;
8861 for (auto CCVal : {0, 1, 2, 3})
8862 BinaryOpSDVals.emplace_back(DAG.getNode(
8863 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8864 return BinaryOpSDVals;
8865 }
8866}
8867
8868static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8869 SelectionDAG &DAG) {
8870 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8871 // set by the CCReg instruction using the CCValid / CCMask masks,
8872 // If the CCReg instruction is itself a ICMP / TM testing the condition
8873 // code set by some other instruction, see whether we can directly
8874 // use that condition code.
8875 auto *CCNode = CCReg.getNode();
8876 if (!CCNode)
8877 return false;
8878
8879 if (CCNode->getOpcode() == SystemZISD::TM) {
8880 if (CCValid != SystemZ::CCMASK_TM)
8881 return false;
8882 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8883 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8884 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8885 if (!Op0Node || !Op1Node)
8886 return -1;
8887 auto Op0APVal = Op0Node->getAPIntValue();
8888 auto Op1APVal = Op1Node->getAPIntValue();
8889 auto Result = Op0APVal & Op1APVal;
8890 bool AllOnes = Result == Op1APVal;
8891 bool AllZeros = Result == 0;
8892 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
8893 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8894 };
8895 SDValue Op0 = CCNode->getOperand(0);
8896 SDValue Op1 = CCNode->getOperand(1);
8897 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8898 if (Op0CC == SDValue())
8899 return false;
8900 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8901 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8902 if (Op0SDVals.empty() || Op1SDVals.empty())
8903 return false;
8904 int NewCCMask = 0;
8905 for (auto CC : {0, 1, 2, 3}) {
8906 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8907 if (CCVal < 0)
8908 return false;
8909 NewCCMask <<= 1;
8910 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8911 }
8912 NewCCMask &= Op0CCValid;
8913 CCReg = Op0CC;
8914 CCMask = NewCCMask;
8915 CCValid = Op0CCValid;
8916 return true;
8917 }
8918 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8919 CCValid != SystemZ::CCMASK_ICMP)
8920 return false;
8921
8922 SDValue CmpOp0 = CCNode->getOperand(0);
8923 SDValue CmpOp1 = CCNode->getOperand(1);
8924 SDValue CmpOp2 = CCNode->getOperand(2);
8925 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8926 if (Op0CC != SDValue()) {
8927 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8928 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8929 if (Op0SDVals.empty() || Op1SDVals.empty())
8930 return false;
8931
8932 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8933 auto CmpTypeVal = CmpType->getZExtValue();
8934 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8935 const SDValue &Op1Val) {
8936 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8937 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8938 if (!Op0Node || !Op1Node)
8939 return -1;
8940 auto Op0APVal = Op0Node->getAPIntValue();
8941 auto Op1APVal = Op1Node->getAPIntValue();
8942 if (CmpTypeVal == SystemZICMP::SignedOnly)
8943 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8944 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8945 };
8946 int NewCCMask = 0;
8947 for (auto CC : {0, 1, 2, 3}) {
8948 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8949 if (CCVal < 0)
8950 return false;
8951 NewCCMask <<= 1;
8952 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8953 }
8954 NewCCMask &= Op0CCValid;
8955 CCMask = NewCCMask;
8956 CCReg = Op0CC;
8957 CCValid = Op0CCValid;
8958 return true;
8959 }
8960
8961 return false;
8962}
8963
8964// Merging versus split in multiple branches cost.
8967 const Value *Lhs,
8968 const Value *Rhs) const {
8969 const auto isFlagOutOpCC = [](const Value *V) {
8970 using namespace llvm::PatternMatch;
8971 const Value *RHSVal;
8972 const APInt *RHSC;
8973 if (const auto *I = dyn_cast<Instruction>(V)) {
8974 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8975 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8976 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8977 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8978 if (CB->isInlineAsm()) {
8979 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8980 return IA &&
8981 IA->getConstraintString().find("{@cc}") != std::string::npos;
8982 }
8983 }
8984 }
8985 }
8986 return false;
8987 };
8988 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8989 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8990 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8991 // conditionals will be merged or else conditionals will be split.
8992 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8993 return {3, 0, -1};
8994 // Default.
8995 return {-1, -1, -1};
8996}
8997
8998SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8999 DAGCombinerInfo &DCI) const {
9000 SelectionDAG &DAG = DCI.DAG;
9001
9002 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
9003 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9004 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9005 if (!CCValid || !CCMask)
9006 return SDValue();
9007
9008 int CCValidVal = CCValid->getZExtValue();
9009 int CCMaskVal = CCMask->getZExtValue();
9010 SDValue Chain = N->getOperand(0);
9011 SDValue CCReg = N->getOperand(4);
9012 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG))
9013 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
9014 Chain,
9015 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9016 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
9017 N->getOperand(3), CCReg);
9018 return SDValue();
9019}
9020
9021SDValue SystemZTargetLowering::combineSELECT_CCMASK(
9022 SDNode *N, DAGCombinerInfo &DCI) const {
9023 SelectionDAG &DAG = DCI.DAG;
9024
9025 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
9026 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
9027 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
9028 if (!CCValid || !CCMask)
9029 return SDValue();
9030
9031 int CCValidVal = CCValid->getZExtValue();
9032 int CCMaskVal = CCMask->getZExtValue();
9033 SDValue CCReg = N->getOperand(4);
9034
9035 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
9036
9037 // Populate SDVals vector for each condition code ccval for given Val, which
9038 // can again be another nested select_ccmask with the same CC.
9039 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
9040 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
9042 if (Val.getOperand(4) != CCReg)
9043 return SmallVector<SDValue, 4>{};
9044 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
9045 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
9046 if (!CCMask)
9047 return SmallVector<SDValue, 4>{};
9048
9049 int CCMaskVal = CCMask->getZExtValue();
9050 for (auto &CC : {0, 1, 2, 3})
9051 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
9052 : FalseVal);
9053 return Res;
9054 }
9055 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
9056 };
9057 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
9058 // with CCReg found by combineCCMask or original CCReg.
9059 SDValue TrueVal = N->getOperand(0);
9060 SDValue FalseVal = N->getOperand(1);
9061 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
9062 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
9063 // TrueSDVals/FalseSDVals might be empty in case of non-constant
9064 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
9065 if (TrueSDVals.empty())
9066 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
9067 if (FalseSDVals.empty())
9068 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
9069 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
9070 SmallSet<SDValue, 4> MergedSDValsSet;
9071 // Ignoring CC values outside CCValiid.
9072 for (auto CC : {0, 1, 2, 3}) {
9073 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
9074 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
9075 ? TrueSDVals[CC]
9076 : FalseSDVals[CC]);
9077 }
9078 if (MergedSDValsSet.size() == 1)
9079 return *MergedSDValsSet.begin();
9080 if (MergedSDValsSet.size() == 2) {
9081 auto BeginIt = MergedSDValsSet.begin();
9082 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
9083 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
9084 std::swap(NewTrueVal, NewFalseVal);
9085 int NewCCMask = 0;
9086 for (auto CC : {0, 1, 2, 3}) {
9087 NewCCMask <<= 1;
9088 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
9089 ? (TrueSDVals[CC] == NewTrueVal)
9090 : (FalseSDVals[CC] == NewTrueVal);
9091 }
9092 CCMaskVal = NewCCMask;
9093 CCMaskVal &= CCValidVal;
9094 TrueVal = NewTrueVal;
9095 FalseVal = NewFalseVal;
9096 IsCombinedCCReg = true;
9097 }
9098 }
9099
9100 if (IsCombinedCCReg)
9101 return DAG.getNode(
9102 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
9103 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9104 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
9105
9106 return SDValue();
9107}
9108
9109SDValue SystemZTargetLowering::combineGET_CCMASK(
9110 SDNode *N, DAGCombinerInfo &DCI) const {
9111
9112 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
9113 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9114 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9115 if (!CCValid || !CCMask)
9116 return SDValue();
9117 int CCValidVal = CCValid->getZExtValue();
9118 int CCMaskVal = CCMask->getZExtValue();
9119
9120 SDValue Select = N->getOperand(0);
9121 if (Select->getOpcode() == ISD::TRUNCATE)
9122 Select = Select->getOperand(0);
9123 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9124 return SDValue();
9125
9126 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
9127 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
9128 if (!SelectCCValid || !SelectCCMask)
9129 return SDValue();
9130 int SelectCCValidVal = SelectCCValid->getZExtValue();
9131 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9132
9133 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
9134 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
9135 if (!TrueVal || !FalseVal)
9136 return SDValue();
9137 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9138 ;
9139 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9140 SelectCCMaskVal ^= SelectCCValidVal;
9141 else
9142 return SDValue();
9143
9144 if (SelectCCValidVal & ~CCValidVal)
9145 return SDValue();
9146 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9147 return SDValue();
9148
9149 return Select->getOperand(4);
9150}
9151
9152SDValue SystemZTargetLowering::combineIntDIVREM(
9153 SDNode *N, DAGCombinerInfo &DCI) const {
9154 SelectionDAG &DAG = DCI.DAG;
9155 EVT VT = N->getValueType(0);
9156 // In the case where the divisor is a vector of constants a cheaper
9157 // sequence of instructions can replace the divide. BuildSDIV is called to
9158 // do this during DAG combining, but it only succeeds when it can build a
9159 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9160 // since it is not Legal but Custom it can only happen before
9161 // legalization. Therefore we must scalarize this early before Combine
9162 // 1. For widened vectors, this is already the result of type legalization.
9163 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9164 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9165 return DAG.UnrollVectorOp(N);
9166 return SDValue();
9167}
9168
9169
9170// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9171// This is closely modeled after the common-code combineShiftToMULH.
9172SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9173 SDNode *N, DAGCombinerInfo &DCI) const {
9174 SelectionDAG &DAG = DCI.DAG;
9175 SDLoc DL(N);
9176
9177 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9178 "SRL or SRA node is required here!");
9179
9180 if (!Subtarget.hasVector())
9181 return SDValue();
9182
9183 // Check the shift amount. Proceed with the transformation if the shift
9184 // amount is constant.
9185 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9186 if (!ShiftAmtSrc)
9187 return SDValue();
9188
9189 // The operation feeding into the shift must be an add.
9190 SDValue ShiftOperand = N->getOperand(0);
9191 if (ShiftOperand.getOpcode() != ISD::ADD)
9192 return SDValue();
9193
9194 // One operand of the add must be a multiply.
9195 SDValue MulOp = ShiftOperand.getOperand(0);
9196 SDValue AddOp = ShiftOperand.getOperand(1);
9197 if (MulOp.getOpcode() != ISD::MUL) {
9198 if (AddOp.getOpcode() != ISD::MUL)
9199 return SDValue();
9200 std::swap(MulOp, AddOp);
9201 }
9202
9203 // All operands must be equivalent extend nodes.
9204 SDValue LeftOp = MulOp.getOperand(0);
9205 SDValue RightOp = MulOp.getOperand(1);
9206
9207 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9208 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9209
9210 if (!IsSignExt && !IsZeroExt)
9211 return SDValue();
9212
9213 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9214 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9215
9216 SDValue MulhRightOp;
9217 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9218 unsigned ActiveBits = IsSignExt
9219 ? Constant->getAPIntValue().getSignificantBits()
9220 : Constant->getAPIntValue().getActiveBits();
9221 if (ActiveBits > NarrowVTSize)
9222 return SDValue();
9223 MulhRightOp = DAG.getConstant(
9224 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9225 NarrowVT);
9226 } else {
9227 if (LeftOp.getOpcode() != RightOp.getOpcode())
9228 return SDValue();
9229 // Check that the two extend nodes are the same type.
9230 if (NarrowVT != RightOp.getOperand(0).getValueType())
9231 return SDValue();
9232 MulhRightOp = RightOp.getOperand(0);
9233 }
9234
9235 SDValue MulhAddOp;
9236 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9237 unsigned ActiveBits = IsSignExt
9238 ? Constant->getAPIntValue().getSignificantBits()
9239 : Constant->getAPIntValue().getActiveBits();
9240 if (ActiveBits > NarrowVTSize)
9241 return SDValue();
9242 MulhAddOp = DAG.getConstant(
9243 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9244 NarrowVT);
9245 } else {
9246 if (LeftOp.getOpcode() != AddOp.getOpcode())
9247 return SDValue();
9248 // Check that the two extend nodes are the same type.
9249 if (NarrowVT != AddOp.getOperand(0).getValueType())
9250 return SDValue();
9251 MulhAddOp = AddOp.getOperand(0);
9252 }
9253
9254 EVT WideVT = LeftOp.getValueType();
9255 // Proceed with the transformation if the wide types match.
9256 assert((WideVT == RightOp.getValueType()) &&
9257 "Cannot have a multiply node with two different operand types.");
9258 assert((WideVT == AddOp.getValueType()) &&
9259 "Cannot have an add node with two different operand types.");
9260
9261 // Proceed with the transformation if the wide type is twice as large
9262 // as the narrow type.
9263 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9264 return SDValue();
9265
9266 // Check the shift amount with the narrow type size.
9267 // Proceed with the transformation if the shift amount is the width
9268 // of the narrow type.
9269 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9270 if (ShiftAmt != NarrowVTSize)
9271 return SDValue();
9272
9273 // Proceed if we support the multiply-and-add-high operation.
9274 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9275 NarrowVT == MVT::v4i32 ||
9276 (Subtarget.hasVectorEnhancements3() &&
9277 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9278 return SDValue();
9279
9280 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9282 DL, NarrowVT, LeftOp.getOperand(0),
9283 MulhRightOp, MulhAddOp);
9284 bool IsSigned = N->getOpcode() == ISD::SRA;
9285 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9286}
9287
9288// Op is an operand of a multiplication. Check whether this can be folded
9289// into an even/odd widening operation; if so, return the opcode to be used
9290// and update Op to the appropriate sub-operand. Note that the caller must
9291// verify that *both* operands of the multiplication support the operation.
9293 const SystemZSubtarget &Subtarget,
9294 SDValue &Op) {
9295 EVT VT = Op.getValueType();
9296
9297 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9298 // to selecting the even or odd vector elements.
9299 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9300 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9301 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9302 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9303 unsigned NumElts = VT.getVectorNumElements();
9304 Op = Op.getOperand(0);
9305 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9306 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9308 ArrayRef<int> ShuffleMask = SVN->getMask();
9309 bool CanUseEven = true, CanUseOdd = true;
9310 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9311 if (ShuffleMask[Elt] == -1)
9312 continue;
9313 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9314 CanUseEven = false;
9315 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9316 CanUseOdd = false;
9317 }
9318 Op = Op.getOperand(0);
9319 if (CanUseEven)
9320 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9321 if (CanUseOdd)
9322 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9323 }
9324 }
9325
9326 // For z17, we can also support the v2i64->i128 case, which looks like
9327 // (sign/zero_extend (extract_vector_elt X 0/1))
9328 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9329 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9330 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9331 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9332 Op = Op.getOperand(0);
9333 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9334 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9335 Op.getOperand(1).getOpcode() == ISD::Constant) {
9336 unsigned Elem = Op.getConstantOperandVal(1);
9337 Op = Op.getOperand(0);
9338 if (Elem == 0)
9339 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9340 if (Elem == 1)
9341 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9342 }
9343 }
9344
9345 return 0;
9346}
9347
9348SDValue SystemZTargetLowering::combineMUL(
9349 SDNode *N, DAGCombinerInfo &DCI) const {
9350 SelectionDAG &DAG = DCI.DAG;
9351
9352 // Detect even/odd widening multiplication.
9353 SDValue Op0 = N->getOperand(0);
9354 SDValue Op1 = N->getOperand(1);
9355 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9356 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9357 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9358 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9359
9360 return SDValue();
9361}
9362
9363SDValue SystemZTargetLowering::combineINTRINSIC(
9364 SDNode *N, DAGCombinerInfo &DCI) const {
9365 SelectionDAG &DAG = DCI.DAG;
9366
9367 unsigned Id = N->getConstantOperandVal(1);
9368 switch (Id) {
9369 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9370 // or larger is simply a vector load.
9371 case Intrinsic::s390_vll:
9372 case Intrinsic::s390_vlrl:
9373 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9374 if (C->getZExtValue() >= 15)
9375 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9376 N->getOperand(3), MachinePointerInfo());
9377 break;
9378 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9379 case Intrinsic::s390_vstl:
9380 case Intrinsic::s390_vstrl:
9381 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9382 if (C->getZExtValue() >= 15)
9383 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9384 N->getOperand(4), MachinePointerInfo());
9385 break;
9386 }
9387
9388 return SDValue();
9389}
9390
9391SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9392 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9393 return N->getOperand(0);
9394 return N;
9395}
9396
9398 DAGCombinerInfo &DCI) const {
9399 switch(N->getOpcode()) {
9400 default: break;
9401 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9402 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9403 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9405 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9406 case ISD::LOAD: return combineLOAD(N, DCI);
9407 case ISD::STORE: return combineSTORE(N, DCI);
9408 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9409 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9410 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9412 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9414 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9415 case ISD::SINT_TO_FP:
9416 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9417 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9418 case ISD::BSWAP: return combineBSWAP(N, DCI);
9419 case ISD::SETCC: return combineSETCC(N, DCI);
9420 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9421 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9422 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9423 case ISD::SRL:
9424 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9425 case ISD::MUL: return combineMUL(N, DCI);
9426 case ISD::SDIV:
9427 case ISD::UDIV:
9428 case ISD::SREM:
9429 case ISD::UREM: return combineIntDIVREM(N, DCI);
9431 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9432 }
9433
9434 return SDValue();
9435}
9436
9437// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9438// are for Op.
9439static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9440 unsigned OpNo) {
9441 EVT VT = Op.getValueType();
9442 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9443 APInt SrcDemE;
9444 unsigned Opcode = Op.getOpcode();
9445 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9446 unsigned Id = Op.getConstantOperandVal(0);
9447 switch (Id) {
9448 case Intrinsic::s390_vpksh: // PACKS
9449 case Intrinsic::s390_vpksf:
9450 case Intrinsic::s390_vpksg:
9451 case Intrinsic::s390_vpkshs: // PACKS_CC
9452 case Intrinsic::s390_vpksfs:
9453 case Intrinsic::s390_vpksgs:
9454 case Intrinsic::s390_vpklsh: // PACKLS
9455 case Intrinsic::s390_vpklsf:
9456 case Intrinsic::s390_vpklsg:
9457 case Intrinsic::s390_vpklshs: // PACKLS_CC
9458 case Intrinsic::s390_vpklsfs:
9459 case Intrinsic::s390_vpklsgs:
9460 // VECTOR PACK truncates the elements of two source vectors into one.
9461 SrcDemE = DemandedElts;
9462 if (OpNo == 2)
9463 SrcDemE.lshrInPlace(NumElts / 2);
9464 SrcDemE = SrcDemE.trunc(NumElts / 2);
9465 break;
9466 // VECTOR UNPACK extends half the elements of the source vector.
9467 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9468 case Intrinsic::s390_vuphh:
9469 case Intrinsic::s390_vuphf:
9470 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9471 case Intrinsic::s390_vuplhh:
9472 case Intrinsic::s390_vuplhf:
9473 SrcDemE = APInt(NumElts * 2, 0);
9474 SrcDemE.insertBits(DemandedElts, 0);
9475 break;
9476 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9477 case Intrinsic::s390_vuplhw:
9478 case Intrinsic::s390_vuplf:
9479 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9480 case Intrinsic::s390_vupllh:
9481 case Intrinsic::s390_vupllf:
9482 SrcDemE = APInt(NumElts * 2, 0);
9483 SrcDemE.insertBits(DemandedElts, NumElts);
9484 break;
9485 case Intrinsic::s390_vpdi: {
9486 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9487 SrcDemE = APInt(NumElts, 0);
9488 if (!DemandedElts[OpNo - 1])
9489 break;
9490 unsigned Mask = Op.getConstantOperandVal(3);
9491 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9492 // Demand input element 0 or 1, given by the mask bit value.
9493 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9494 break;
9495 }
9496 case Intrinsic::s390_vsldb: {
9497 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9498 assert(VT == MVT::v16i8 && "Unexpected type.");
9499 unsigned FirstIdx = Op.getConstantOperandVal(3);
9500 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9501 unsigned NumSrc0Els = 16 - FirstIdx;
9502 SrcDemE = APInt(NumElts, 0);
9503 if (OpNo == 1) {
9504 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9505 SrcDemE.insertBits(DemEls, FirstIdx);
9506 } else {
9507 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9508 SrcDemE.insertBits(DemEls, 0);
9509 }
9510 break;
9511 }
9512 case Intrinsic::s390_vperm:
9513 SrcDemE = APInt::getAllOnes(NumElts);
9514 break;
9515 default:
9516 llvm_unreachable("Unhandled intrinsic.");
9517 break;
9518 }
9519 } else {
9520 switch (Opcode) {
9522 // Scalar operand.
9523 SrcDemE = APInt(1, 1);
9524 break;
9526 SrcDemE = DemandedElts;
9527 break;
9528 default:
9529 llvm_unreachable("Unhandled opcode.");
9530 break;
9531 }
9532 }
9533 return SrcDemE;
9534}
9535
9536static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9537 const APInt &DemandedElts,
9538 const SelectionDAG &DAG, unsigned Depth,
9539 unsigned OpNo) {
9540 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9541 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9542 KnownBits LHSKnown =
9543 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9544 KnownBits RHSKnown =
9545 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9546 Known = LHSKnown.intersectWith(RHSKnown);
9547}
9548
9549void
9551 KnownBits &Known,
9552 const APInt &DemandedElts,
9553 const SelectionDAG &DAG,
9554 unsigned Depth) const {
9555 Known.resetAll();
9556
9557 // Intrinsic CC result is returned in the two low bits.
9558 unsigned Tmp0, Tmp1; // not used
9559 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9560 Known.Zero.setBitsFrom(2);
9561 return;
9562 }
9563 EVT VT = Op.getValueType();
9564 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9565 return;
9566 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9567 "KnownBits does not match VT in bitwidth");
9568 assert ((!VT.isVector() ||
9569 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9570 "DemandedElts does not match VT number of elements");
9571 unsigned BitWidth = Known.getBitWidth();
9572 unsigned Opcode = Op.getOpcode();
9573 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9574 bool IsLogical = false;
9575 unsigned Id = Op.getConstantOperandVal(0);
9576 switch (Id) {
9577 case Intrinsic::s390_vpksh: // PACKS
9578 case Intrinsic::s390_vpksf:
9579 case Intrinsic::s390_vpksg:
9580 case Intrinsic::s390_vpkshs: // PACKS_CC
9581 case Intrinsic::s390_vpksfs:
9582 case Intrinsic::s390_vpksgs:
9583 case Intrinsic::s390_vpklsh: // PACKLS
9584 case Intrinsic::s390_vpklsf:
9585 case Intrinsic::s390_vpklsg:
9586 case Intrinsic::s390_vpklshs: // PACKLS_CC
9587 case Intrinsic::s390_vpklsfs:
9588 case Intrinsic::s390_vpklsgs:
9589 case Intrinsic::s390_vpdi:
9590 case Intrinsic::s390_vsldb:
9591 case Intrinsic::s390_vperm:
9592 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9593 break;
9594 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9595 case Intrinsic::s390_vuplhh:
9596 case Intrinsic::s390_vuplhf:
9597 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9598 case Intrinsic::s390_vupllh:
9599 case Intrinsic::s390_vupllf:
9600 IsLogical = true;
9601 [[fallthrough]];
9602 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9603 case Intrinsic::s390_vuphh:
9604 case Intrinsic::s390_vuphf:
9605 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9606 case Intrinsic::s390_vuplhw:
9607 case Intrinsic::s390_vuplf: {
9608 SDValue SrcOp = Op.getOperand(1);
9609 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9610 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9611 if (IsLogical) {
9612 Known = Known.zext(BitWidth);
9613 } else
9614 Known = Known.sext(BitWidth);
9615 break;
9616 }
9617 default:
9618 break;
9619 }
9620 } else {
9621 switch (Opcode) {
9624 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9625 break;
9626 case SystemZISD::REPLICATE: {
9627 SDValue SrcOp = Op.getOperand(0);
9628 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9630 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9631 break;
9632 }
9633 default:
9634 break;
9635 }
9636 }
9637
9638 // Known has the width of the source operand(s). Adjust if needed to match
9639 // the passed bitwidth.
9640 if (Known.getBitWidth() != BitWidth)
9641 Known = Known.anyextOrTrunc(BitWidth);
9642}
9643
9644static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9645 const SelectionDAG &DAG, unsigned Depth,
9646 unsigned OpNo) {
9647 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9648 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9649 if (LHS == 1) return 1; // Early out.
9650 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9651 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9652 if (RHS == 1) return 1; // Early out.
9653 unsigned Common = std::min(LHS, RHS);
9654 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9655 EVT VT = Op.getValueType();
9656 unsigned VTBits = VT.getScalarSizeInBits();
9657 if (SrcBitWidth > VTBits) { // PACK
9658 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9659 if (Common > SrcExtraBits)
9660 return (Common - SrcExtraBits);
9661 return 1;
9662 }
9663 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9664 return Common;
9665}
9666
9667unsigned
9669 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9670 unsigned Depth) const {
9671 if (Op.getResNo() != 0)
9672 return 1;
9673 unsigned Opcode = Op.getOpcode();
9674 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9675 unsigned Id = Op.getConstantOperandVal(0);
9676 switch (Id) {
9677 case Intrinsic::s390_vpksh: // PACKS
9678 case Intrinsic::s390_vpksf:
9679 case Intrinsic::s390_vpksg:
9680 case Intrinsic::s390_vpkshs: // PACKS_CC
9681 case Intrinsic::s390_vpksfs:
9682 case Intrinsic::s390_vpksgs:
9683 case Intrinsic::s390_vpklsh: // PACKLS
9684 case Intrinsic::s390_vpklsf:
9685 case Intrinsic::s390_vpklsg:
9686 case Intrinsic::s390_vpklshs: // PACKLS_CC
9687 case Intrinsic::s390_vpklsfs:
9688 case Intrinsic::s390_vpklsgs:
9689 case Intrinsic::s390_vpdi:
9690 case Intrinsic::s390_vsldb:
9691 case Intrinsic::s390_vperm:
9692 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9693 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9694 case Intrinsic::s390_vuphh:
9695 case Intrinsic::s390_vuphf:
9696 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9697 case Intrinsic::s390_vuplhw:
9698 case Intrinsic::s390_vuplf: {
9699 SDValue PackedOp = Op.getOperand(1);
9700 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9701 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9702 EVT VT = Op.getValueType();
9703 unsigned VTBits = VT.getScalarSizeInBits();
9704 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9705 return Tmp;
9706 }
9707 default:
9708 break;
9709 }
9710 } else {
9711 switch (Opcode) {
9713 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9714 default:
9715 break;
9716 }
9717 }
9718
9719 return 1;
9720}
9721
9724 const APInt &DemandedElts, const SelectionDAG &DAG,
9725 bool PoisonOnly, unsigned Depth) const {
9726 switch (Op->getOpcode()) {
9729 return true;
9730 }
9731 return false;
9732}
9733
9734unsigned
9736 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9737 unsigned StackAlign = TFI->getStackAlignment();
9738 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9739 "Unexpected stack alignment");
9740 // The default stack probe size is 4096 if the function has no
9741 // stack-probe-size attribute.
9742 unsigned StackProbeSize =
9743 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9744 // Round down to the stack alignment.
9745 StackProbeSize &= ~(StackAlign - 1);
9746 return StackProbeSize ? StackProbeSize : StackAlign;
9747}
9748
9749//===----------------------------------------------------------------------===//
9750// Custom insertion
9751//===----------------------------------------------------------------------===//
9752
9753// Force base value Base into a register before MI. Return the register.
9755 const SystemZInstrInfo *TII) {
9756 MachineBasicBlock *MBB = MI.getParent();
9757 MachineFunction &MF = *MBB->getParent();
9759
9760 if (Base.isReg()) {
9761 // Copy Base into a new virtual register to help register coalescing in
9762 // cases with multiple uses.
9763 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9764 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9765 .add(Base);
9766 return Reg;
9767 }
9768
9769 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9770 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9771 .add(Base)
9772 .addImm(0)
9773 .addReg(0);
9774 return Reg;
9775}
9776
9777// The CC operand of MI might be missing a kill marker because there
9778// were multiple uses of CC, and ISel didn't know which to mark.
9779// Figure out whether MI should have had a kill marker.
9781 // Scan forward through BB for a use/def of CC.
9783 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9784 const MachineInstr &MI = *miI;
9785 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9786 return false;
9787 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9788 break; // Should have kill-flag - update below.
9789 }
9790
9791 // If we hit the end of the block, check whether CC is live into a
9792 // successor.
9793 if (miI == MBB->end()) {
9794 for (const MachineBasicBlock *Succ : MBB->successors())
9795 if (Succ->isLiveIn(SystemZ::CC))
9796 return false;
9797 }
9798
9799 return true;
9800}
9801
9802// Return true if it is OK for this Select pseudo-opcode to be cascaded
9803// together with other Select pseudo-opcodes into a single basic-block with
9804// a conditional jump around it.
9806 switch (MI.getOpcode()) {
9807 case SystemZ::Select32:
9808 case SystemZ::Select64:
9809 case SystemZ::Select128:
9810 case SystemZ::SelectF32:
9811 case SystemZ::SelectF64:
9812 case SystemZ::SelectF128:
9813 case SystemZ::SelectVR32:
9814 case SystemZ::SelectVR64:
9815 case SystemZ::SelectVR128:
9816 return true;
9817
9818 default:
9819 return false;
9820 }
9821}
9822
9823// Helper function, which inserts PHI functions into SinkMBB:
9824// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9825// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9827 MachineBasicBlock *TrueMBB,
9828 MachineBasicBlock *FalseMBB,
9829 MachineBasicBlock *SinkMBB) {
9830 MachineFunction *MF = TrueMBB->getParent();
9832
9833 MachineInstr *FirstMI = Selects.front();
9834 unsigned CCValid = FirstMI->getOperand(3).getImm();
9835 unsigned CCMask = FirstMI->getOperand(4).getImm();
9836
9837 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9838
9839 // As we are creating the PHIs, we have to be careful if there is more than
9840 // one. Later Selects may reference the results of earlier Selects, but later
9841 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9842 // That also means that PHI construction must work forward from earlier to
9843 // later, and that the code must maintain a mapping from earlier PHI's
9844 // destination registers, and the registers that went into the PHI.
9846
9847 for (auto *MI : Selects) {
9848 Register DestReg = MI->getOperand(0).getReg();
9849 Register TrueReg = MI->getOperand(1).getReg();
9850 Register FalseReg = MI->getOperand(2).getReg();
9851
9852 // If this Select we are generating is the opposite condition from
9853 // the jump we generated, then we have to swap the operands for the
9854 // PHI that is going to be generated.
9855 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9856 std::swap(TrueReg, FalseReg);
9857
9858 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9859 TrueReg = It->second.first;
9860
9861 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9862 FalseReg = It->second.second;
9863
9864 DebugLoc DL = MI->getDebugLoc();
9865 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9866 .addReg(TrueReg).addMBB(TrueMBB)
9867 .addReg(FalseReg).addMBB(FalseMBB);
9868
9869 // Add this PHI to the rewrite table.
9870 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9871 }
9872
9873 MF->getProperties().resetNoPHIs();
9874}
9875
9877SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9878 MachineBasicBlock *BB) const {
9879 MachineFunction &MF = *BB->getParent();
9880 MachineFrameInfo &MFI = MF.getFrameInfo();
9881 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9882 assert(TFL->hasReservedCallFrame(MF) &&
9883 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9884 (void)TFL;
9885 // Get the MaxCallFrameSize value and erase MI since it serves no further
9886 // purpose as the call frame is statically reserved in the prolog. Set
9887 // AdjustsStack as MI is *not* mapped as a frame instruction.
9888 uint32_t NumBytes = MI.getOperand(0).getImm();
9889 if (NumBytes > MFI.getMaxCallFrameSize())
9890 MFI.setMaxCallFrameSize(NumBytes);
9891 MFI.setAdjustsStack(true);
9892
9893 MI.eraseFromParent();
9894 return BB;
9895}
9896
9897// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9899SystemZTargetLowering::emitSelect(MachineInstr &MI,
9900 MachineBasicBlock *MBB) const {
9901 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9902 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9903
9904 unsigned CCValid = MI.getOperand(3).getImm();
9905 unsigned CCMask = MI.getOperand(4).getImm();
9906
9907 // If we have a sequence of Select* pseudo instructions using the
9908 // same condition code value, we want to expand all of them into
9909 // a single pair of basic blocks using the same condition.
9910 SmallVector<MachineInstr*, 8> Selects;
9911 SmallVector<MachineInstr*, 8> DbgValues;
9912 Selects.push_back(&MI);
9913 unsigned Count = 0;
9914 for (MachineInstr &NextMI : llvm::make_range(
9915 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9916 if (isSelectPseudo(NextMI)) {
9917 assert(NextMI.getOperand(3).getImm() == CCValid &&
9918 "Bad CCValid operands since CC was not redefined.");
9919 if (NextMI.getOperand(4).getImm() == CCMask ||
9920 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9921 Selects.push_back(&NextMI);
9922 continue;
9923 }
9924 break;
9925 }
9926 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9927 NextMI.usesCustomInsertionHook())
9928 break;
9929 bool User = false;
9930 for (auto *SelMI : Selects)
9931 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9932 User = true;
9933 break;
9934 }
9935 if (NextMI.isDebugInstr()) {
9936 if (User) {
9937 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9938 DbgValues.push_back(&NextMI);
9939 }
9940 } else if (User || ++Count > 20)
9941 break;
9942 }
9943
9944 MachineInstr *LastMI = Selects.back();
9945 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9946 checkCCKill(*LastMI, MBB));
9947 MachineBasicBlock *StartMBB = MBB;
9948 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9949 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9950
9951 // Unless CC was killed in the last Select instruction, mark it as
9952 // live-in to both FalseMBB and JoinMBB.
9953 if (!CCKilled) {
9954 FalseMBB->addLiveIn(SystemZ::CC);
9955 JoinMBB->addLiveIn(SystemZ::CC);
9956 }
9957
9958 // StartMBB:
9959 // BRC CCMask, JoinMBB
9960 // # fallthrough to FalseMBB
9961 MBB = StartMBB;
9962 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9963 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9964 MBB->addSuccessor(JoinMBB);
9965 MBB->addSuccessor(FalseMBB);
9966
9967 // FalseMBB:
9968 // # fallthrough to JoinMBB
9969 MBB = FalseMBB;
9970 MBB->addSuccessor(JoinMBB);
9971
9972 // JoinMBB:
9973 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9974 // ...
9975 MBB = JoinMBB;
9976 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9977 for (auto *SelMI : Selects)
9978 SelMI->eraseFromParent();
9979
9981 for (auto *DbgMI : DbgValues)
9982 MBB->splice(InsertPos, StartMBB, DbgMI);
9983
9984 return JoinMBB;
9985}
9986
9987// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9988// StoreOpcode is the store to use and Invert says whether the store should
9989// happen when the condition is false rather than true. If a STORE ON
9990// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9991MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9993 unsigned StoreOpcode,
9994 unsigned STOCOpcode,
9995 bool Invert) const {
9996 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9997
9998 Register SrcReg = MI.getOperand(0).getReg();
9999 MachineOperand Base = MI.getOperand(1);
10000 int64_t Disp = MI.getOperand(2).getImm();
10001 Register IndexReg = MI.getOperand(3).getReg();
10002 unsigned CCValid = MI.getOperand(4).getImm();
10003 unsigned CCMask = MI.getOperand(5).getImm();
10004 DebugLoc DL = MI.getDebugLoc();
10005
10006 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
10007
10008 // ISel pattern matching also adds a load memory operand of the same
10009 // address, so take special care to find the storing memory operand.
10010 MachineMemOperand *MMO = nullptr;
10011 for (auto *I : MI.memoperands())
10012 if (I->isStore()) {
10013 MMO = I;
10014 break;
10015 }
10016
10017 // Use STOCOpcode if possible. We could use different store patterns in
10018 // order to avoid matching the index register, but the performance trade-offs
10019 // might be more complicated in that case.
10020 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
10021 if (Invert)
10022 CCMask ^= CCValid;
10023
10024 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
10025 .addReg(SrcReg)
10026 .add(Base)
10027 .addImm(Disp)
10028 .addImm(CCValid)
10029 .addImm(CCMask)
10030 .addMemOperand(MMO);
10031
10032 MI.eraseFromParent();
10033 return MBB;
10034 }
10035
10036 // Get the condition needed to branch around the store.
10037 if (!Invert)
10038 CCMask ^= CCValid;
10039
10040 MachineBasicBlock *StartMBB = MBB;
10041 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
10042 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
10043
10044 // Unless CC was killed in the CondStore instruction, mark it as
10045 // live-in to both FalseMBB and JoinMBB.
10046 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
10047 !checkCCKill(MI, JoinMBB)) {
10048 FalseMBB->addLiveIn(SystemZ::CC);
10049 JoinMBB->addLiveIn(SystemZ::CC);
10050 }
10051
10052 // StartMBB:
10053 // BRC CCMask, JoinMBB
10054 // # fallthrough to FalseMBB
10055 MBB = StartMBB;
10056 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10057 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
10058 MBB->addSuccessor(JoinMBB);
10059 MBB->addSuccessor(FalseMBB);
10060
10061 // FalseMBB:
10062 // store %SrcReg, %Disp(%Index,%Base)
10063 // # fallthrough to JoinMBB
10064 MBB = FalseMBB;
10065 BuildMI(MBB, DL, TII->get(StoreOpcode))
10066 .addReg(SrcReg)
10067 .add(Base)
10068 .addImm(Disp)
10069 .addReg(IndexReg)
10070 .addMemOperand(MMO);
10071 MBB->addSuccessor(JoinMBB);
10072
10073 MI.eraseFromParent();
10074 return JoinMBB;
10075}
10076
10077// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
10079SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
10081 bool Unsigned) const {
10082 MachineFunction &MF = *MBB->getParent();
10083 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10084 MachineRegisterInfo &MRI = MF.getRegInfo();
10085
10086 // Synthetic instruction to compare 128-bit values.
10087 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
10088 Register Op0 = MI.getOperand(0).getReg();
10089 Register Op1 = MI.getOperand(1).getReg();
10090
10091 MachineBasicBlock *StartMBB = MBB;
10092 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
10093 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
10094
10095 // StartMBB:
10096 //
10097 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
10098 // Swap the inputs to get:
10099 // CC 1 if high(Op0) > high(Op1)
10100 // CC 2 if high(Op0) < high(Op1)
10101 // CC 0 if high(Op0) == high(Op1)
10102 //
10103 // If CC != 0, we'd done, so jump over the next instruction.
10104 //
10105 // VEC[L]G Op1, Op0
10106 // JNE JoinMBB
10107 // # fallthrough to HiEqMBB
10108 MBB = StartMBB;
10109 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
10110 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
10111 .addReg(Op1).addReg(Op0);
10112 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
10114 MBB->addSuccessor(JoinMBB);
10115 MBB->addSuccessor(HiEqMBB);
10116
10117 // HiEqMBB:
10118 //
10119 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10120 // Since we already know the high parts are equal, the CC
10121 // result will only depend on the low parts:
10122 // CC 1 if low(Op0) > low(Op1)
10123 // CC 3 if low(Op0) <= low(Op1)
10124 //
10125 // VCHLGS Tmp, Op0, Op1
10126 // # fallthrough to JoinMBB
10127 MBB = HiEqMBB;
10128 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
10129 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
10130 .addReg(Op0).addReg(Op1);
10131 MBB->addSuccessor(JoinMBB);
10132
10133 // Mark CC as live-in to JoinMBB.
10134 JoinMBB->addLiveIn(SystemZ::CC);
10135
10136 MI.eraseFromParent();
10137 return JoinMBB;
10138}
10139
10140// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10141// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10142// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10143// whether the field should be inverted after performing BinOpcode (e.g. for
10144// NAND).
10145MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10146 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10147 bool Invert) const {
10148 MachineFunction &MF = *MBB->getParent();
10149 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10150 MachineRegisterInfo &MRI = MF.getRegInfo();
10151
10152 // Extract the operands. Base can be a register or a frame index.
10153 // Src2 can be a register or immediate.
10154 Register Dest = MI.getOperand(0).getReg();
10155 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10156 int64_t Disp = MI.getOperand(2).getImm();
10157 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10158 Register BitShift = MI.getOperand(4).getReg();
10159 Register NegBitShift = MI.getOperand(5).getReg();
10160 unsigned BitSize = MI.getOperand(6).getImm();
10161 DebugLoc DL = MI.getDebugLoc();
10162
10163 // Get the right opcodes for the displacement.
10164 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10165 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10166 assert(LOpcode && CSOpcode && "Displacement out of range");
10167
10168 // Create virtual registers for temporary results.
10169 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10170 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10171 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10172 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10173 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10174
10175 // Insert a basic block for the main loop.
10176 MachineBasicBlock *StartMBB = MBB;
10177 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10178 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10179
10180 // StartMBB:
10181 // ...
10182 // %OrigVal = L Disp(%Base)
10183 // # fall through to LoopMBB
10184 MBB = StartMBB;
10185 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10186 MBB->addSuccessor(LoopMBB);
10187
10188 // LoopMBB:
10189 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10190 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10191 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10192 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10193 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10194 // JNE LoopMBB
10195 // # fall through to DoneMBB
10196 MBB = LoopMBB;
10197 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10198 .addReg(OrigVal).addMBB(StartMBB)
10199 .addReg(Dest).addMBB(LoopMBB);
10200 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10201 .addReg(OldVal).addReg(BitShift).addImm(0);
10202 if (Invert) {
10203 // Perform the operation normally and then invert every bit of the field.
10204 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10205 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10206 // XILF with the upper BitSize bits set.
10207 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10208 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10209 } else if (BinOpcode)
10210 // A simply binary operation.
10211 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10212 .addReg(RotatedOldVal)
10213 .add(Src2);
10214 else
10215 // Use RISBG to rotate Src2 into position and use it to replace the
10216 // field in RotatedOldVal.
10217 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10218 .addReg(RotatedOldVal).addReg(Src2.getReg())
10219 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10220 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10221 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10222 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10223 .addReg(OldVal)
10224 .addReg(NewVal)
10225 .add(Base)
10226 .addImm(Disp);
10227 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10229 MBB->addSuccessor(LoopMBB);
10230 MBB->addSuccessor(DoneMBB);
10231
10232 MI.eraseFromParent();
10233 return DoneMBB;
10234}
10235
10236// Implement EmitInstrWithCustomInserter for subword pseudo
10237// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10238// instruction that should be used to compare the current field with the
10239// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10240// for when the current field should be kept.
10241MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10242 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10243 unsigned KeepOldMask) const {
10244 MachineFunction &MF = *MBB->getParent();
10245 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10246 MachineRegisterInfo &MRI = MF.getRegInfo();
10247
10248 // Extract the operands. Base can be a register or a frame index.
10249 Register Dest = MI.getOperand(0).getReg();
10250 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10251 int64_t Disp = MI.getOperand(2).getImm();
10252 Register Src2 = MI.getOperand(3).getReg();
10253 Register BitShift = MI.getOperand(4).getReg();
10254 Register NegBitShift = MI.getOperand(5).getReg();
10255 unsigned BitSize = MI.getOperand(6).getImm();
10256 DebugLoc DL = MI.getDebugLoc();
10257
10258 // Get the right opcodes for the displacement.
10259 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10260 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10261 assert(LOpcode && CSOpcode && "Displacement out of range");
10262
10263 // Create virtual registers for temporary results.
10264 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10265 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10266 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10267 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10268 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10269 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10270
10271 // Insert 3 basic blocks for the loop.
10272 MachineBasicBlock *StartMBB = MBB;
10273 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10274 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10275 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10276 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10277
10278 // StartMBB:
10279 // ...
10280 // %OrigVal = L Disp(%Base)
10281 // # fall through to LoopMBB
10282 MBB = StartMBB;
10283 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10284 MBB->addSuccessor(LoopMBB);
10285
10286 // LoopMBB:
10287 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10288 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10289 // CompareOpcode %RotatedOldVal, %Src2
10290 // BRC KeepOldMask, UpdateMBB
10291 MBB = LoopMBB;
10292 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10293 .addReg(OrigVal).addMBB(StartMBB)
10294 .addReg(Dest).addMBB(UpdateMBB);
10295 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10296 .addReg(OldVal).addReg(BitShift).addImm(0);
10297 BuildMI(MBB, DL, TII->get(CompareOpcode))
10298 .addReg(RotatedOldVal).addReg(Src2);
10299 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10300 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10301 MBB->addSuccessor(UpdateMBB);
10302 MBB->addSuccessor(UseAltMBB);
10303
10304 // UseAltMBB:
10305 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10306 // # fall through to UpdateMBB
10307 MBB = UseAltMBB;
10308 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10309 .addReg(RotatedOldVal).addReg(Src2)
10310 .addImm(32).addImm(31 + BitSize).addImm(0);
10311 MBB->addSuccessor(UpdateMBB);
10312
10313 // UpdateMBB:
10314 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10315 // [ %RotatedAltVal, UseAltMBB ]
10316 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10317 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10318 // JNE LoopMBB
10319 // # fall through to DoneMBB
10320 MBB = UpdateMBB;
10321 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10322 .addReg(RotatedOldVal).addMBB(LoopMBB)
10323 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10324 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10325 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10326 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10327 .addReg(OldVal)
10328 .addReg(NewVal)
10329 .add(Base)
10330 .addImm(Disp);
10331 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10333 MBB->addSuccessor(LoopMBB);
10334 MBB->addSuccessor(DoneMBB);
10335
10336 MI.eraseFromParent();
10337 return DoneMBB;
10338}
10339
10340// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10341// instruction MI.
10343SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10344 MachineBasicBlock *MBB) const {
10345 MachineFunction &MF = *MBB->getParent();
10346 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10347 MachineRegisterInfo &MRI = MF.getRegInfo();
10348
10349 // Extract the operands. Base can be a register or a frame index.
10350 Register Dest = MI.getOperand(0).getReg();
10351 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10352 int64_t Disp = MI.getOperand(2).getImm();
10353 Register CmpVal = MI.getOperand(3).getReg();
10354 Register OrigSwapVal = MI.getOperand(4).getReg();
10355 Register BitShift = MI.getOperand(5).getReg();
10356 Register NegBitShift = MI.getOperand(6).getReg();
10357 int64_t BitSize = MI.getOperand(7).getImm();
10358 DebugLoc DL = MI.getDebugLoc();
10359
10360 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10361
10362 // Get the right opcodes for the displacement and zero-extension.
10363 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10364 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10365 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10366 assert(LOpcode && CSOpcode && "Displacement out of range");
10367
10368 // Create virtual registers for temporary results.
10369 Register OrigOldVal = MRI.createVirtualRegister(RC);
10370 Register OldVal = MRI.createVirtualRegister(RC);
10371 Register SwapVal = MRI.createVirtualRegister(RC);
10372 Register StoreVal = MRI.createVirtualRegister(RC);
10373 Register OldValRot = MRI.createVirtualRegister(RC);
10374 Register RetryOldVal = MRI.createVirtualRegister(RC);
10375 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10376
10377 // Insert 2 basic blocks for the loop.
10378 MachineBasicBlock *StartMBB = MBB;
10379 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10380 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10381 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10382
10383 // StartMBB:
10384 // ...
10385 // %OrigOldVal = L Disp(%Base)
10386 // # fall through to LoopMBB
10387 MBB = StartMBB;
10388 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10389 .add(Base)
10390 .addImm(Disp)
10391 .addReg(0);
10392 MBB->addSuccessor(LoopMBB);
10393
10394 // LoopMBB:
10395 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10396 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10397 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10398 // ^^ The low BitSize bits contain the field
10399 // of interest.
10400 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10401 // ^^ Replace the upper 32-BitSize bits of the
10402 // swap value with those that we loaded and rotated.
10403 // %Dest = LL[CH] %OldValRot
10404 // CR %Dest, %CmpVal
10405 // JNE DoneMBB
10406 // # Fall through to SetMBB
10407 MBB = LoopMBB;
10408 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10409 .addReg(OrigOldVal).addMBB(StartMBB)
10410 .addReg(RetryOldVal).addMBB(SetMBB);
10411 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10412 .addReg(OrigSwapVal).addMBB(StartMBB)
10413 .addReg(RetrySwapVal).addMBB(SetMBB);
10414 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10415 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10416 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10417 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10418 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10419 .addReg(OldValRot);
10420 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10421 .addReg(Dest).addReg(CmpVal);
10422 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10425 MBB->addSuccessor(DoneMBB);
10426 MBB->addSuccessor(SetMBB);
10427
10428 // SetMBB:
10429 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10430 // ^^ Rotate the new field to its proper position.
10431 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10432 // JNE LoopMBB
10433 // # fall through to ExitMBB
10434 MBB = SetMBB;
10435 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10436 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10437 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10438 .addReg(OldVal)
10439 .addReg(StoreVal)
10440 .add(Base)
10441 .addImm(Disp);
10442 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10444 MBB->addSuccessor(LoopMBB);
10445 MBB->addSuccessor(DoneMBB);
10446
10447 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10448 // to the block after the loop. At this point, CC may have been defined
10449 // either by the CR in LoopMBB or by the CS in SetMBB.
10450 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10451 DoneMBB->addLiveIn(SystemZ::CC);
10452
10453 MI.eraseFromParent();
10454 return DoneMBB;
10455}
10456
10457// Emit a move from two GR64s to a GR128.
10459SystemZTargetLowering::emitPair128(MachineInstr &MI,
10460 MachineBasicBlock *MBB) const {
10461 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10462 const DebugLoc &DL = MI.getDebugLoc();
10463
10464 Register Dest = MI.getOperand(0).getReg();
10465 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10466 .add(MI.getOperand(1))
10467 .addImm(SystemZ::subreg_h64)
10468 .add(MI.getOperand(2))
10469 .addImm(SystemZ::subreg_l64);
10470 MI.eraseFromParent();
10471 return MBB;
10472}
10473
10474// Emit an extension from a GR64 to a GR128. ClearEven is true
10475// if the high register of the GR128 value must be cleared or false if
10476// it's "don't care".
10477MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10479 bool ClearEven) const {
10480 MachineFunction &MF = *MBB->getParent();
10481 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10482 MachineRegisterInfo &MRI = MF.getRegInfo();
10483 DebugLoc DL = MI.getDebugLoc();
10484
10485 Register Dest = MI.getOperand(0).getReg();
10486 Register Src = MI.getOperand(1).getReg();
10487 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10488
10489 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10490 if (ClearEven) {
10491 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10492 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10493
10494 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10495 .addImm(0);
10496 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10497 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10498 In128 = NewIn128;
10499 }
10500 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10501 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10502
10503 MI.eraseFromParent();
10504 return MBB;
10505}
10506
10508SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10510 unsigned Opcode, bool IsMemset) const {
10511 MachineFunction &MF = *MBB->getParent();
10512 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10513 MachineRegisterInfo &MRI = MF.getRegInfo();
10514 DebugLoc DL = MI.getDebugLoc();
10515
10516 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10517 uint64_t DestDisp = MI.getOperand(1).getImm();
10518 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10519 uint64_t SrcDisp;
10520
10521 // Fold the displacement Disp if it is out of range.
10522 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10523 if (!isUInt<12>(Disp)) {
10524 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10525 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10526 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10527 .add(Base).addImm(Disp).addReg(0);
10529 Disp = 0;
10530 }
10531 };
10532
10533 if (!IsMemset) {
10534 SrcBase = earlyUseOperand(MI.getOperand(2));
10535 SrcDisp = MI.getOperand(3).getImm();
10536 } else {
10537 SrcBase = DestBase;
10538 SrcDisp = DestDisp++;
10539 foldDisplIfNeeded(DestBase, DestDisp);
10540 }
10541
10542 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10543 bool IsImmForm = LengthMO.isImm();
10544 bool IsRegForm = !IsImmForm;
10545
10546 // Build and insert one Opcode of Length, with special treatment for memset.
10547 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10549 MachineOperand DBase, uint64_t DDisp,
10550 MachineOperand SBase, uint64_t SDisp,
10551 unsigned Length) -> void {
10552 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10553 if (IsMemset) {
10554 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10555 if (ByteMO.isImm())
10556 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10557 .add(SBase).addImm(SDisp).add(ByteMO);
10558 else
10559 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10560 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10561 if (--Length == 0)
10562 return;
10563 }
10564 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10565 .add(DBase).addImm(DDisp).addImm(Length)
10566 .add(SBase).addImm(SDisp)
10567 .setMemRefs(MI.memoperands());
10568 };
10569
10570 bool NeedsLoop = false;
10571 uint64_t ImmLength = 0;
10572 Register LenAdjReg = SystemZ::NoRegister;
10573 if (IsImmForm) {
10574 ImmLength = LengthMO.getImm();
10575 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10576 if (ImmLength == 0) {
10577 MI.eraseFromParent();
10578 return MBB;
10579 }
10580 if (Opcode == SystemZ::CLC) {
10581 if (ImmLength > 3 * 256)
10582 // A two-CLC sequence is a clear win over a loop, not least because
10583 // it needs only one branch. A three-CLC sequence needs the same
10584 // number of branches as a loop (i.e. 2), but is shorter. That
10585 // brings us to lengths greater than 768 bytes. It seems relatively
10586 // likely that a difference will be found within the first 768 bytes,
10587 // so we just optimize for the smallest number of branch
10588 // instructions, in order to avoid polluting the prediction buffer
10589 // too much.
10590 NeedsLoop = true;
10591 } else if (ImmLength > 6 * 256)
10592 // The heuristic we use is to prefer loops for anything that would
10593 // require 7 or more MVCs. With these kinds of sizes there isn't much
10594 // to choose between straight-line code and looping code, since the
10595 // time will be dominated by the MVCs themselves.
10596 NeedsLoop = true;
10597 } else {
10598 NeedsLoop = true;
10599 LenAdjReg = LengthMO.getReg();
10600 }
10601
10602 // When generating more than one CLC, all but the last will need to
10603 // branch to the end when a difference is found.
10604 MachineBasicBlock *EndMBB =
10605 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10607 : nullptr);
10608
10609 if (NeedsLoop) {
10610 Register StartCountReg =
10611 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10612 if (IsImmForm) {
10613 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10614 ImmLength &= 255;
10615 } else {
10616 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10617 .addReg(LenAdjReg)
10618 .addReg(0)
10619 .addImm(8);
10620 }
10621
10622 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10623 auto loadZeroAddress = [&]() -> MachineOperand {
10624 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10625 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10626 return MachineOperand::CreateReg(Reg, false);
10627 };
10628 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10629 DestBase = loadZeroAddress();
10630 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10631 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10632
10633 MachineBasicBlock *StartMBB = nullptr;
10634 MachineBasicBlock *LoopMBB = nullptr;
10635 MachineBasicBlock *NextMBB = nullptr;
10636 MachineBasicBlock *DoneMBB = nullptr;
10637 MachineBasicBlock *AllDoneMBB = nullptr;
10638
10639 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10640 Register StartDestReg =
10641 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10642
10643 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10644 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10645 Register ThisDestReg =
10646 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10647 Register NextSrcReg = MRI.createVirtualRegister(RC);
10648 Register NextDestReg =
10649 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10650 RC = &SystemZ::GR64BitRegClass;
10651 Register ThisCountReg = MRI.createVirtualRegister(RC);
10652 Register NextCountReg = MRI.createVirtualRegister(RC);
10653
10654 if (IsRegForm) {
10655 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10656 StartMBB = SystemZ::emitBlockAfter(MBB);
10657 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10658 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10659 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10660
10661 // MBB:
10662 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10663 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10664 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10665 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10667 .addMBB(AllDoneMBB);
10668 MBB->addSuccessor(AllDoneMBB);
10669 if (!IsMemset)
10670 MBB->addSuccessor(StartMBB);
10671 else {
10672 // MemsetOneCheckMBB:
10673 // # Jump to MemsetOneMBB for a memset of length 1, or
10674 // # fall thru to StartMBB.
10675 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10676 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10677 MBB->addSuccessor(MemsetOneCheckMBB);
10678 MBB = MemsetOneCheckMBB;
10679 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10680 .addReg(LenAdjReg).addImm(-1);
10681 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10683 .addMBB(MemsetOneMBB);
10684 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10685 MBB->addSuccessor(StartMBB, {90, 100});
10686
10687 // MemsetOneMBB:
10688 // # Jump back to AllDoneMBB after a single MVI or STC.
10689 MBB = MemsetOneMBB;
10690 insertMemMemOp(MBB, MBB->end(),
10691 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10692 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10693 1);
10694 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10695 MBB->addSuccessor(AllDoneMBB);
10696 }
10697
10698 // StartMBB:
10699 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10700 MBB = StartMBB;
10701 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10702 .addReg(StartCountReg).addImm(0);
10703 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10705 .addMBB(DoneMBB);
10706 MBB->addSuccessor(DoneMBB);
10707 MBB->addSuccessor(LoopMBB);
10708 }
10709 else {
10710 StartMBB = MBB;
10711 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10712 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10713 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10714
10715 // StartMBB:
10716 // # fall through to LoopMBB
10717 MBB->addSuccessor(LoopMBB);
10718
10719 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10720 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10721 if (EndMBB && !ImmLength)
10722 // If the loop handled the whole CLC range, DoneMBB will be empty with
10723 // CC live-through into EndMBB, so add it as live-in.
10724 DoneMBB->addLiveIn(SystemZ::CC);
10725 }
10726
10727 // LoopMBB:
10728 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10729 // [ %NextDestReg, NextMBB ]
10730 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10731 // [ %NextSrcReg, NextMBB ]
10732 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10733 // [ %NextCountReg, NextMBB ]
10734 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10735 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10736 // ( JLH EndMBB )
10737 //
10738 // The prefetch is used only for MVC. The JLH is used only for CLC.
10739 MBB = LoopMBB;
10740 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10741 .addReg(StartDestReg).addMBB(StartMBB)
10742 .addReg(NextDestReg).addMBB(NextMBB);
10743 if (!HaveSingleBase)
10744 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10745 .addReg(StartSrcReg).addMBB(StartMBB)
10746 .addReg(NextSrcReg).addMBB(NextMBB);
10747 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10748 .addReg(StartCountReg).addMBB(StartMBB)
10749 .addReg(NextCountReg).addMBB(NextMBB);
10750 if (Opcode == SystemZ::MVC)
10751 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10753 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10754 insertMemMemOp(MBB, MBB->end(),
10755 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10756 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10757 if (EndMBB) {
10758 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10760 .addMBB(EndMBB);
10761 MBB->addSuccessor(EndMBB);
10762 MBB->addSuccessor(NextMBB);
10763 }
10764
10765 // NextMBB:
10766 // %NextDestReg = LA 256(%ThisDestReg)
10767 // %NextSrcReg = LA 256(%ThisSrcReg)
10768 // %NextCountReg = AGHI %ThisCountReg, -1
10769 // CGHI %NextCountReg, 0
10770 // JLH LoopMBB
10771 // # fall through to DoneMBB
10772 //
10773 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10774 MBB = NextMBB;
10775 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10776 .addReg(ThisDestReg).addImm(256).addReg(0);
10777 if (!HaveSingleBase)
10778 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10779 .addReg(ThisSrcReg).addImm(256).addReg(0);
10780 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10781 .addReg(ThisCountReg).addImm(-1);
10782 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10783 .addReg(NextCountReg).addImm(0);
10784 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10786 .addMBB(LoopMBB);
10787 MBB->addSuccessor(LoopMBB);
10788 MBB->addSuccessor(DoneMBB);
10789
10790 MBB = DoneMBB;
10791 if (IsRegForm) {
10792 // DoneMBB:
10793 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10794 // # Use EXecute Relative Long for the remainder of the bytes. The target
10795 // instruction of the EXRL will have a length field of 1 since 0 is an
10796 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10797 // 0xff) + 1.
10798 // # Fall through to AllDoneMBB.
10799 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10800 Register RemDestReg = HaveSingleBase ? RemSrcReg
10801 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10802 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10803 .addReg(StartDestReg).addMBB(StartMBB)
10804 .addReg(NextDestReg).addMBB(NextMBB);
10805 if (!HaveSingleBase)
10806 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10807 .addReg(StartSrcReg).addMBB(StartMBB)
10808 .addReg(NextSrcReg).addMBB(NextMBB);
10809 if (IsMemset)
10810 insertMemMemOp(MBB, MBB->end(),
10811 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10812 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10813 MachineInstrBuilder EXRL_MIB =
10814 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10815 .addImm(Opcode)
10816 .addReg(LenAdjReg)
10817 .addReg(RemDestReg).addImm(DestDisp)
10818 .addReg(RemSrcReg).addImm(SrcDisp);
10819 MBB->addSuccessor(AllDoneMBB);
10820 MBB = AllDoneMBB;
10821 if (Opcode != SystemZ::MVC) {
10822 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10823 if (EndMBB)
10824 MBB->addLiveIn(SystemZ::CC);
10825 }
10826 }
10827 MF.getProperties().resetNoPHIs();
10828 }
10829
10830 // Handle any remaining bytes with straight-line code.
10831 while (ImmLength > 0) {
10832 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10833 // The previous iteration might have created out-of-range displacements.
10834 // Apply them using LA/LAY if so.
10835 foldDisplIfNeeded(DestBase, DestDisp);
10836 foldDisplIfNeeded(SrcBase, SrcDisp);
10837 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10838 DestDisp += ThisLength;
10839 SrcDisp += ThisLength;
10840 ImmLength -= ThisLength;
10841 // If there's another CLC to go, branch to the end if a difference
10842 // was found.
10843 if (EndMBB && ImmLength > 0) {
10844 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10845 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10847 .addMBB(EndMBB);
10848 MBB->addSuccessor(EndMBB);
10849 MBB->addSuccessor(NextMBB);
10850 MBB = NextMBB;
10851 }
10852 }
10853 if (EndMBB) {
10854 MBB->addSuccessor(EndMBB);
10855 MBB = EndMBB;
10856 MBB->addLiveIn(SystemZ::CC);
10857 }
10858
10859 MI.eraseFromParent();
10860 return MBB;
10861}
10862
10863// Decompose string pseudo-instruction MI into a loop that continually performs
10864// Opcode until CC != 3.
10865MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10866 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10867 MachineFunction &MF = *MBB->getParent();
10868 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10869 MachineRegisterInfo &MRI = MF.getRegInfo();
10870 DebugLoc DL = MI.getDebugLoc();
10871
10872 uint64_t End1Reg = MI.getOperand(0).getReg();
10873 uint64_t Start1Reg = MI.getOperand(1).getReg();
10874 uint64_t Start2Reg = MI.getOperand(2).getReg();
10875 uint64_t CharReg = MI.getOperand(3).getReg();
10876
10877 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10878 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10879 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10880 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10881
10882 MachineBasicBlock *StartMBB = MBB;
10883 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10884 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10885
10886 // StartMBB:
10887 // # fall through to LoopMBB
10888 MBB->addSuccessor(LoopMBB);
10889
10890 // LoopMBB:
10891 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10892 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10893 // R0L = %CharReg
10894 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10895 // JO LoopMBB
10896 // # fall through to DoneMBB
10897 //
10898 // The load of R0L can be hoisted by post-RA LICM.
10899 MBB = LoopMBB;
10900
10901 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10902 .addReg(Start1Reg).addMBB(StartMBB)
10903 .addReg(End1Reg).addMBB(LoopMBB);
10904 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10905 .addReg(Start2Reg).addMBB(StartMBB)
10906 .addReg(End2Reg).addMBB(LoopMBB);
10907 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10908 BuildMI(MBB, DL, TII->get(Opcode))
10909 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10910 .addReg(This1Reg).addReg(This2Reg);
10911 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10913 MBB->addSuccessor(LoopMBB);
10914 MBB->addSuccessor(DoneMBB);
10915
10916 DoneMBB->addLiveIn(SystemZ::CC);
10917
10918 MI.eraseFromParent();
10919 return DoneMBB;
10920}
10921
10922// Update TBEGIN instruction with final opcode and register clobbers.
10923MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10924 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10925 bool NoFloat) const {
10926 MachineFunction &MF = *MBB->getParent();
10927 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10928 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10929
10930 // Update opcode.
10931 MI.setDesc(TII->get(Opcode));
10932
10933 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10934 // Make sure to add the corresponding GRSM bits if they are missing.
10935 uint64_t Control = MI.getOperand(2).getImm();
10936 static const unsigned GPRControlBit[16] = {
10937 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10938 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10939 };
10940 Control |= GPRControlBit[15];
10941 if (TFI->hasFP(MF))
10942 Control |= GPRControlBit[11];
10943 MI.getOperand(2).setImm(Control);
10944
10945 // Add GPR clobbers.
10946 for (int I = 0; I < 16; I++) {
10947 if ((Control & GPRControlBit[I]) == 0) {
10948 unsigned Reg = SystemZMC::GR64Regs[I];
10949 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10950 }
10951 }
10952
10953 // Add FPR/VR clobbers.
10954 if (!NoFloat && (Control & 4) != 0) {
10955 if (Subtarget.hasVector()) {
10956 for (unsigned Reg : SystemZMC::VR128Regs) {
10957 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10958 }
10959 } else {
10960 for (unsigned Reg : SystemZMC::FP64Regs) {
10961 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10962 }
10963 }
10964 }
10965
10966 return MBB;
10967}
10968
10969MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10970 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10971 MachineFunction &MF = *MBB->getParent();
10972 MachineRegisterInfo *MRI = &MF.getRegInfo();
10973 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10974 DebugLoc DL = MI.getDebugLoc();
10975
10976 Register SrcReg = MI.getOperand(0).getReg();
10977
10978 // Create new virtual register of the same class as source.
10979 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10980 Register DstReg = MRI->createVirtualRegister(RC);
10981
10982 // Replace pseudo with a normal load-and-test that models the def as
10983 // well.
10984 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10985 .addReg(SrcReg)
10986 .setMIFlags(MI.getFlags());
10987 MI.eraseFromParent();
10988
10989 return MBB;
10990}
10991
10992MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10994 MachineFunction &MF = *MBB->getParent();
10995 MachineRegisterInfo *MRI = &MF.getRegInfo();
10996 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10997 DebugLoc DL = MI.getDebugLoc();
10998 const unsigned ProbeSize = getStackProbeSize(MF);
10999 Register DstReg = MI.getOperand(0).getReg();
11000 Register SizeReg = MI.getOperand(2).getReg();
11001
11002 MachineBasicBlock *StartMBB = MBB;
11003 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
11004 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
11005 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
11006 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
11007 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
11008
11009 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
11011
11012 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11013 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11014
11015 // LoopTestMBB
11016 // BRC TailTestMBB
11017 // # fallthrough to LoopBodyMBB
11018 StartMBB->addSuccessor(LoopTestMBB);
11019 MBB = LoopTestMBB;
11020 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
11021 .addReg(SizeReg)
11022 .addMBB(StartMBB)
11023 .addReg(IncReg)
11024 .addMBB(LoopBodyMBB);
11025 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
11026 .addReg(PHIReg)
11027 .addImm(ProbeSize);
11028 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11030 .addMBB(TailTestMBB);
11031 MBB->addSuccessor(LoopBodyMBB);
11032 MBB->addSuccessor(TailTestMBB);
11033
11034 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
11035 // J LoopTestMBB
11036 MBB = LoopBodyMBB;
11037 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
11038 .addReg(PHIReg)
11039 .addImm(ProbeSize);
11040 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
11041 .addReg(SystemZ::R15D)
11042 .addImm(ProbeSize);
11043 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11044 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
11045 .setMemRefs(VolLdMMO);
11046 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
11047 MBB->addSuccessor(LoopTestMBB);
11048
11049 // TailTestMBB
11050 // BRC DoneMBB
11051 // # fallthrough to TailMBB
11052 MBB = TailTestMBB;
11053 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
11054 .addReg(PHIReg)
11055 .addImm(0);
11056 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11058 .addMBB(DoneMBB);
11059 MBB->addSuccessor(TailMBB);
11060 MBB->addSuccessor(DoneMBB);
11061
11062 // TailMBB
11063 // # fallthrough to DoneMBB
11064 MBB = TailMBB;
11065 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
11066 .addReg(SystemZ::R15D)
11067 .addReg(PHIReg);
11068 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11069 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
11070 .setMemRefs(VolLdMMO);
11071 MBB->addSuccessor(DoneMBB);
11072
11073 // DoneMBB
11074 MBB = DoneMBB;
11075 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
11076 .addReg(SystemZ::R15D);
11077
11078 MI.eraseFromParent();
11079 return DoneMBB;
11080}
11081
11082SDValue SystemZTargetLowering::
11083getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
11084 MachineFunction &MF = DAG.getMachineFunction();
11085 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
11086 SDLoc DL(SP);
11087 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
11088 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
11089}
11090
11093 switch (MI.getOpcode()) {
11094 case SystemZ::ADJCALLSTACKDOWN:
11095 case SystemZ::ADJCALLSTACKUP:
11096 return emitAdjCallStack(MI, MBB);
11097
11098 case SystemZ::Select32:
11099 case SystemZ::Select64:
11100 case SystemZ::Select128:
11101 case SystemZ::SelectF32:
11102 case SystemZ::SelectF64:
11103 case SystemZ::SelectF128:
11104 case SystemZ::SelectVR32:
11105 case SystemZ::SelectVR64:
11106 case SystemZ::SelectVR128:
11107 return emitSelect(MI, MBB);
11108
11109 case SystemZ::CondStore8Mux:
11110 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
11111 case SystemZ::CondStore8MuxInv:
11112 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
11113 case SystemZ::CondStore16Mux:
11114 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
11115 case SystemZ::CondStore16MuxInv:
11116 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
11117 case SystemZ::CondStore32Mux:
11118 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
11119 case SystemZ::CondStore32MuxInv:
11120 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
11121 case SystemZ::CondStore8:
11122 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
11123 case SystemZ::CondStore8Inv:
11124 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
11125 case SystemZ::CondStore16:
11126 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
11127 case SystemZ::CondStore16Inv:
11128 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
11129 case SystemZ::CondStore32:
11130 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
11131 case SystemZ::CondStore32Inv:
11132 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
11133 case SystemZ::CondStore64:
11134 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
11135 case SystemZ::CondStore64Inv:
11136 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
11137 case SystemZ::CondStoreF32:
11138 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
11139 case SystemZ::CondStoreF32Inv:
11140 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
11141 case SystemZ::CondStoreF64:
11142 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
11143 case SystemZ::CondStoreF64Inv:
11144 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
11145
11146 case SystemZ::SCmp128Hi:
11147 return emitICmp128Hi(MI, MBB, false);
11148 case SystemZ::UCmp128Hi:
11149 return emitICmp128Hi(MI, MBB, true);
11150
11151 case SystemZ::PAIR128:
11152 return emitPair128(MI, MBB);
11153 case SystemZ::AEXT128:
11154 return emitExt128(MI, MBB, false);
11155 case SystemZ::ZEXT128:
11156 return emitExt128(MI, MBB, true);
11157
11158 case SystemZ::ATOMIC_SWAPW:
11159 return emitAtomicLoadBinary(MI, MBB, 0);
11160
11161 case SystemZ::ATOMIC_LOADW_AR:
11162 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11163 case SystemZ::ATOMIC_LOADW_AFI:
11164 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11165
11166 case SystemZ::ATOMIC_LOADW_SR:
11167 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11168
11169 case SystemZ::ATOMIC_LOADW_NR:
11170 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11171 case SystemZ::ATOMIC_LOADW_NILH:
11172 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11173
11174 case SystemZ::ATOMIC_LOADW_OR:
11175 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11176 case SystemZ::ATOMIC_LOADW_OILH:
11177 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11178
11179 case SystemZ::ATOMIC_LOADW_XR:
11180 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11181 case SystemZ::ATOMIC_LOADW_XILF:
11182 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11183
11184 case SystemZ::ATOMIC_LOADW_NRi:
11185 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11186 case SystemZ::ATOMIC_LOADW_NILHi:
11187 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11188
11189 case SystemZ::ATOMIC_LOADW_MIN:
11190 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11191 case SystemZ::ATOMIC_LOADW_MAX:
11192 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11193 case SystemZ::ATOMIC_LOADW_UMIN:
11194 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11195 case SystemZ::ATOMIC_LOADW_UMAX:
11196 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11197
11198 case SystemZ::ATOMIC_CMP_SWAPW:
11199 return emitAtomicCmpSwapW(MI, MBB);
11200 case SystemZ::MVCImm:
11201 case SystemZ::MVCReg:
11202 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11203 case SystemZ::NCImm:
11204 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11205 case SystemZ::OCImm:
11206 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11207 case SystemZ::XCImm:
11208 case SystemZ::XCReg:
11209 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11210 case SystemZ::CLCImm:
11211 case SystemZ::CLCReg:
11212 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11213 case SystemZ::MemsetImmImm:
11214 case SystemZ::MemsetImmReg:
11215 case SystemZ::MemsetRegImm:
11216 case SystemZ::MemsetRegReg:
11217 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11218 case SystemZ::CLSTLoop:
11219 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11220 case SystemZ::MVSTLoop:
11221 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11222 case SystemZ::SRSTLoop:
11223 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11224 case SystemZ::TBEGIN:
11225 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11226 case SystemZ::TBEGIN_nofloat:
11227 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11228 case SystemZ::TBEGINC:
11229 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11230 case SystemZ::LTEBRCompare_Pseudo:
11231 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11232 case SystemZ::LTDBRCompare_Pseudo:
11233 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11234 case SystemZ::LTXBRCompare_Pseudo:
11235 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11236
11237 case SystemZ::PROBED_ALLOCA:
11238 return emitProbedAlloca(MI, MBB);
11239 case SystemZ::EH_SjLj_SetJmp:
11240 return emitEHSjLjSetJmp(MI, MBB);
11241 case SystemZ::EH_SjLj_LongJmp:
11242 return emitEHSjLjLongJmp(MI, MBB);
11243
11244 case TargetOpcode::STACKMAP:
11245 case TargetOpcode::PATCHPOINT:
11246 return emitPatchPoint(MI, MBB);
11247
11248 default:
11249 llvm_unreachable("Unexpected instr type to insert");
11250 }
11251}
11252
11253// This is only used by the isel schedulers, and is needed only to prevent
11254// compiler from crashing when list-ilp is used.
11255const TargetRegisterClass *
11256SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11257 if (VT == MVT::Untyped)
11258 return &SystemZ::ADDR128BitRegClass;
11260}
11261
11262SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11263 SelectionDAG &DAG) const {
11264 SDLoc dl(Op);
11265 /*
11266 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11267 settings:
11268 00 Round to nearest
11269 01 Round to 0
11270 10 Round to +inf
11271 11 Round to -inf
11272
11273 FLT_ROUNDS, on the other hand, expects the following:
11274 -1 Undefined
11275 0 Round to 0
11276 1 Round to nearest
11277 2 Round to +inf
11278 3 Round to -inf
11279 */
11280
11281 // Save FPC to register.
11282 SDValue Chain = Op.getOperand(0);
11283 SDValue EFPC(
11284 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11285 Chain = EFPC.getValue(1);
11286
11287 // Transform as necessary
11288 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11289 DAG.getConstant(3, dl, MVT::i32));
11290 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11291 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11292 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11293 DAG.getConstant(1, dl, MVT::i32)));
11294
11295 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11296 DAG.getConstant(1, dl, MVT::i32));
11297 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11298
11299 return DAG.getMergeValues({RetVal, Chain}, dl);
11300}
11301
11302SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11303 SelectionDAG &DAG) const {
11304 EVT VT = Op.getValueType();
11305 Op = Op.getOperand(0);
11306 EVT OpVT = Op.getValueType();
11307
11308 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11309
11310 SDLoc DL(Op);
11311
11312 // load a 0 vector for the third operand of VSUM.
11313 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11314
11315 // execute VSUM.
11316 switch (OpVT.getScalarSizeInBits()) {
11317 case 8:
11318 case 16:
11319 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11320 [[fallthrough]];
11321 case 32:
11322 case 64:
11323 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11324 DAG.getBitcast(Op.getValueType(), Zero));
11325 break;
11326 case 128:
11327 break; // VSUM over v1i128 should not happen and would be a noop
11328 default:
11329 llvm_unreachable("Unexpected scalar size.");
11330 }
11331 // Cast to original vector type, retrieve last element.
11332 return DAG.getNode(
11333 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11334 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11335}
11336
11338 FunctionType *FT = F->getFunctionType();
11339 const AttributeList &Attrs = F->getAttributes();
11340 if (Attrs.hasRetAttrs())
11341 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11342 OS << *F->getReturnType() << " @" << F->getName() << "(";
11343 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11344 if (I)
11345 OS << ", ";
11346 OS << *FT->getParamType(I);
11347 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11348 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11349 if (ArgAttrs.hasAttribute(A))
11350 OS << " " << Attribute::getNameFromAttrKind(A);
11351 }
11352 OS << ")\n";
11353}
11354
11355bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11356 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11357 if (Itr == IsInternalCache.end())
11358 Itr = IsInternalCache
11359 .insert(std::pair<const Function *, bool>(
11360 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11361 .first;
11362 return Itr->second;
11363}
11364
11365void SystemZTargetLowering::
11366verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11367 const Function *F, SDValue Callee) const {
11368 // Temporarily only do the check when explicitly requested, until it can be
11369 // enabled by default.
11371 return;
11372
11373 bool IsInternal = false;
11374 const Function *CalleeFn = nullptr;
11375 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11376 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11377 IsInternal = isInternal(CalleeFn);
11378 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11379 errs() << "ERROR: Missing extension attribute of passed "
11380 << "value in call to function:\n" << "Callee: ";
11381 if (CalleeFn != nullptr)
11382 printFunctionArgExts(CalleeFn, errs());
11383 else
11384 errs() << "-\n";
11385 errs() << "Caller: ";
11387 llvm_unreachable("");
11388 }
11389}
11390
11391void SystemZTargetLowering::
11392verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11393 const Function *F) const {
11394 // Temporarily only do the check when explicitly requested, until it can be
11395 // enabled by default.
11397 return;
11398
11399 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11400 errs() << "ERROR: Missing extension attribute of returned "
11401 << "value from function:\n";
11403 llvm_unreachable("");
11404 }
11405}
11406
11407// Verify that narrow integer arguments are extended as required by the ABI.
11408// Return false if an error is found.
11409bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11410 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11411 if (!Subtarget.isTargetELF())
11412 return true;
11413
11416 return true;
11417 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11418 return true;
11419
11420 for (unsigned i = 0; i < Outs.size(); ++i) {
11421 MVT VT = Outs[i].VT;
11422 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11423 if (VT.isInteger()) {
11424 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11425 "Unexpected integer argument VT.");
11426 if (VT == MVT::i32 &&
11427 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11428 return false;
11429 }
11430 }
11431
11432 return true;
11433}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static std::pair< SDValue, int > findCCUse(const SDValue &Val)
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:322
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
iterator end()
Definition DenseMap.h:81
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:951
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:215
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
size_type size() const
Definition SmallSet.h:170
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:686
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
iterator end() const
Definition StringRef.h:114
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:464
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:463
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:355
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:97
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:331
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:316
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
#define EQ(a, b)
Definition regexec.c:65
#define NC
Definition regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.