LLVM 22.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
18#include "llvm/ADT/SmallSet.h"
24#include "llvm/IR/GlobalAlias.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsS390.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
128 }
129
130 if (Subtarget.hasVector())
131 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
132 }
133
134 // Compute derived properties from the register classes
135 computeRegisterProperties(Subtarget.getRegisterInfo());
136
137 // Set up special registers.
138 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
139
140 // TODO: It may be better to default to latency-oriented scheduling, however
141 // LLVM's current latency-oriented scheduler can't handle physreg definitions
142 // such as SystemZ has with CC, so set this to the register-pressure
143 // scheduler, because it can.
145
148
150
151 // Instructions are strings of 2-byte aligned 2-byte values.
153 // For performance reasons we prefer 16-byte alignment.
155
156 // Handle operations that are handled in a similar way for all types.
157 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
158 I <= MVT::LAST_FP_VALUETYPE;
159 ++I) {
161 if (isTypeLegal(VT)) {
162 // Lower SET_CC into an IPM-based sequence.
166
167 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
169
170 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
172 setOperationAction(ISD::BR_CC, VT, Custom);
173 }
174 }
175
176 // Expand jump table branches as address arithmetic followed by an
177 // indirect jump.
178 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
179
180 // Expand BRCOND into a BR_CC (see above).
181 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
182
183 // Handle integer types except i128.
184 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
185 I <= MVT::LAST_INTEGER_VALUETYPE;
186 ++I) {
188 if (isTypeLegal(VT) && VT != MVT::i128) {
190
191 // Expand individual DIV and REMs into DIVREMs.
198
199 // Support addition/subtraction with overflow.
202
203 // Support addition/subtraction with carry.
206
207 // Support carry in as value rather than glue.
210
211 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
212 // available, or if the operand is constant.
213 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
214
215 // Use POPCNT on z196 and above.
216 if (Subtarget.hasPopulationCount())
218 else
220
221 // No special instructions for these.
224
225 // Use *MUL_LOHI where possible instead of MULH*.
230
231 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
232 // unsigned on z10 (only z196 and above have native support for
233 // unsigned conversions).
240 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
241 auto OpAction =
242 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
243 setOperationAction(Op, VT, OpAction);
244 }
245 }
246 }
247
248 // Handle i128 if legal.
249 if (isTypeLegal(MVT::i128)) {
250 // No special instructions for these.
257
258 // We may be able to use VSLDB/VSLD/VSRD for these.
261
262 // No special instructions for these before z17.
263 if (!Subtarget.hasVectorEnhancements3()) {
273 } else {
274 // Even if we do have a legal 128-bit multiply, we do not
275 // want 64-bit multiply-high operations to use it.
278 }
279
280 // Support addition/subtraction with carry.
285
286 // Use VPOPCT and add up partial results.
288
289 // Additional instructions available with z17.
290 if (Subtarget.hasVectorEnhancements3()) {
291 setOperationAction(ISD::ABS, MVT::i128, Legal);
292
294 MVT::i128, Legal);
295 }
296 }
297
298 // These need custom handling in order to handle the f16 conversions.
307
308 // Type legalization will convert 8- and 16-bit atomic operations into
309 // forms that operate on i32s (but still keeping the original memory VT).
310 // Lower them into full i32 operations.
311 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
312 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
313 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
314 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
315 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
316 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
317 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
318 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
319 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
320 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
321 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
322
323 // Whether or not i128 is not a legal type, we need to custom lower
324 // the atomic operations in order to exploit SystemZ instructions.
325 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
326 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
327 setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom);
328 setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom);
329
330 // Mark sign/zero extending atomic loads as legal, which will make
331 // DAGCombiner fold extensions into atomic loads if possible.
333 {MVT::i8, MVT::i16, MVT::i32}, Legal);
335 {MVT::i8, MVT::i16}, Legal);
337 MVT::i8, Legal);
338
339 // We can use the CC result of compare-and-swap to implement
340 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
341 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
342 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
343 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
344
345 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
346
347 // Traps are legal, as we will convert them to "j .+2".
348 setOperationAction(ISD::TRAP, MVT::Other, Legal);
349
350 // We have native support for a 64-bit CTLZ, via FLOGR.
354
355 // On z17 we have native support for a 64-bit CTTZ.
356 if (Subtarget.hasMiscellaneousExtensions4()) {
360 }
361
362 // On z15 we have native support for a 64-bit CTPOP.
363 if (Subtarget.hasMiscellaneousExtensions3()) {
366 }
367
368 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
370
371 // Expand 128 bit shifts without using a libcall.
375
376 // Also expand 256 bit shifts if i128 is a legal type.
377 if (isTypeLegal(MVT::i128)) {
381 }
382
383 // Handle bitcast from fp128 to i128.
384 if (!isTypeLegal(MVT::i128))
385 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
386
387 // We have native instructions for i8, i16 and i32 extensions, but not i1.
389 for (MVT VT : MVT::integer_valuetypes()) {
393 }
394
395 // Handle the various types of symbolic address.
401
402 // We need to handle dynamic allocations specially because of the
403 // 160-byte area at the bottom of the stack.
404 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
405 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
406
407 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
408 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
409
410 // Handle prefetches with PFD or PFDRL.
411 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
412
413 // Handle readcyclecounter with STCKF.
414 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
415
417 // Assume by default that all vector operations need to be expanded.
418 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
419 if (getOperationAction(Opcode, VT) == Legal)
420 setOperationAction(Opcode, VT, Expand);
421
422 // Likewise all truncating stores and extending loads.
423 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
424 setTruncStoreAction(VT, InnerVT, Expand);
427 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
428 }
429
430 if (isTypeLegal(VT)) {
431 // These operations are legal for anything that can be stored in a
432 // vector register, even if there is no native support for the format
433 // as such. In particular, we can do these for v4f32 even though there
434 // are no specific instructions for that format.
435 setOperationAction(ISD::LOAD, VT, Legal);
436 setOperationAction(ISD::STORE, VT, Legal);
438 setOperationAction(ISD::BITCAST, VT, Legal);
440
441 // Likewise, except that we need to replace the nodes with something
442 // more specific.
445 }
446 }
447
448 // Handle integer vector types.
450 if (isTypeLegal(VT)) {
451 // These operations have direct equivalents.
456 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
460 }
461 if (Subtarget.hasVectorEnhancements3() &&
462 VT != MVT::v16i8 && VT != MVT::v8i16) {
467 }
472 if (Subtarget.hasVectorEnhancements1())
474 else
478
479 // Convert a GPR scalar to a vector by inserting it into element 0.
481
482 // Use a series of unpacks for extensions.
485
486 // Detect shifts/rotates by a scalar amount and convert them into
487 // V*_BY_SCALAR.
492
493 // Add ISD::VECREDUCE_ADD as custom in order to implement
494 // it with VZERO+VSUM
495 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
496
497 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
498 // and inverting the result as necessary.
500
502 Legal);
503 }
504 }
505
506 if (Subtarget.hasVector()) {
507 // There should be no need to check for float types other than v2f64
508 // since <2 x f32> isn't a legal type.
517
526 }
527
528 if (Subtarget.hasVectorEnhancements2()) {
537
546 }
547
548 // Handle floating-point types.
549 if (!useSoftFloat()) {
550 // Promote all f16 operations to float, with some exceptions below.
551 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
552 setOperationAction(Opc, MVT::f16, Promote);
554 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
555 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
556 setTruncStoreAction(VT, MVT::f16, Expand);
557 }
558 for (auto Op : {ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE})
559 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
562 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
564 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
565 setOperationAction(Op, MVT::f16, Legal);
566 }
567
568 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
569 I <= MVT::LAST_FP_VALUETYPE;
570 ++I) {
572 if (isTypeLegal(VT) && VT != MVT::f16) {
573 // We can use FI for FRINT.
574 setOperationAction(ISD::FRINT, VT, Legal);
575
576 // We can use the extended form of FI for other rounding operations.
577 if (Subtarget.hasFPExtension()) {
578 setOperationAction(ISD::FNEARBYINT, VT, Legal);
579 setOperationAction(ISD::FFLOOR, VT, Legal);
580 setOperationAction(ISD::FCEIL, VT, Legal);
581 setOperationAction(ISD::FTRUNC, VT, Legal);
582 setOperationAction(ISD::FROUND, VT, Legal);
583 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
584 }
585
586 // No special instructions for these.
587 setOperationAction(ISD::FSIN, VT, Expand);
588 setOperationAction(ISD::FCOS, VT, Expand);
589 setOperationAction(ISD::FSINCOS, VT, Expand);
591 setOperationAction(ISD::FPOW, VT, Expand);
592
593 // Special treatment.
595
596 // Handle constrained floating-point operations.
605 if (Subtarget.hasFPExtension()) {
612 }
613
614 // Extension from f16 needs libcall.
615 setOperationAction(ISD::FP_EXTEND, VT, Custom);
617 }
618 }
619
620 // Handle floating-point vector types.
621 if (Subtarget.hasVector()) {
622 // Scalar-to-vector conversion is just a subreg.
625
626 // Some insertions and extractions can be done directly but others
627 // need to go via integers.
632
633 // These operations have direct equivalents.
634 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
635 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
636 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
637 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
638 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
639 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
640 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
641 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
642 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
643 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
644 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
645 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
646 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
647 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
648 setOperationAction(ISD::FROUNDEVEN, MVT::v2f64, Legal);
649
650 // Handle constrained floating-point operations.
664
669 if (Subtarget.hasVectorEnhancements1()) {
672 }
673 }
674
675 // The vector enhancements facility 1 has instructions for these.
676 if (Subtarget.hasVectorEnhancements1()) {
677 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
678 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
679 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
680 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
681 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
682 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
683 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
684 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
685 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
686 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
687 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
688 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
689 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
690 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
691 setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
692
693 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
694 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
695 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
696 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
697
698 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
699 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
700 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
701 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
702
703 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
704 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
705 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
706 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
707
708 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
709 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
710 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
711 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
712
713 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
714 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
715 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
716 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
717
718 // Handle constrained floating-point operations.
732 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
733 MVT::v4f32, MVT::v2f64 }) {
738 }
739 }
740
741 // We only have fused f128 multiply-addition on vector registers.
742 if (!Subtarget.hasVectorEnhancements1()) {
745 }
746
747 // We don't have a copysign instruction on vector registers.
748 if (Subtarget.hasVectorEnhancements1())
750
751 // Needed so that we don't try to implement f128 constant loads using
752 // a load-and-extend of a f80 constant (in cases where the constant
753 // would fit in an f80).
754 for (MVT VT : MVT::fp_valuetypes())
755 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
756
757 // We don't have extending load instruction on vector registers.
758 if (Subtarget.hasVectorEnhancements1()) {
759 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
760 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
761 }
762
763 // Floating-point truncation and stores need to be done separately.
764 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
765 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
766 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
767
768 // We have 64-bit FPR<->GPR moves, but need special handling for
769 // 32-bit forms.
770 if (!Subtarget.hasVector()) {
771 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
772 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
773 }
774
775 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
776 // structure, but VAEND is a no-op.
777 setOperationAction(ISD::VASTART, MVT::Other, Custom);
778 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
779 setOperationAction(ISD::VAEND, MVT::Other, Expand);
780
781 if (Subtarget.isTargetzOS()) {
782 // Handle address space casts between mixed sized pointers.
783 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
784 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
785 }
786
788
789 // Codes for which we want to perform some z-specific combinations.
793 ISD::LOAD,
794 ISD::STORE,
799 ISD::FP_EXTEND,
806 ISD::SRL,
807 ISD::SRA,
808 ISD::MUL,
809 ISD::SDIV,
810 ISD::UDIV,
811 ISD::SREM,
812 ISD::UREM,
815
816 // Handle intrinsics.
819
820 // We're not using SJLJ for exception handling, but they're implemented
821 // solely to support use of __builtin_setjmp / __builtin_longjmp.
824
825 // We want to use MVC in preference to even a single load/store pair.
826 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
828
829 // The main memset sequence is a byte store followed by an MVC.
830 // Two STC or MV..I stores win over that, but the kind of fused stores
831 // generated by target-independent code don't when the byte value is
832 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
833 // than "STC;MVC". Handle the choice in target-specific code instead.
834 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
836
837 // Default to having -disable-strictnode-mutation on
838 IsStrictFPEnabled = true;
839}
840
842 return Subtarget.hasSoftFloat();
843}
844
846 LLVMContext &, EVT VT) const {
847 if (!VT.isVector())
848 return MVT::i32;
850}
851
853 const MachineFunction &MF, EVT VT) const {
854 if (useSoftFloat())
855 return false;
856
857 VT = VT.getScalarType();
858
859 if (!VT.isSimple())
860 return false;
861
862 switch (VT.getSimpleVT().SimpleTy) {
863 case MVT::f32:
864 case MVT::f64:
865 return true;
866 case MVT::f128:
867 return Subtarget.hasVectorEnhancements1();
868 default:
869 break;
870 }
871
872 return false;
873}
874
875// Return true if the constant can be generated with a vector instruction,
876// such as VGM, VGMB or VREPI.
878 const SystemZSubtarget &Subtarget) {
879 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
880 if (!Subtarget.hasVector() ||
881 (isFP128 && !Subtarget.hasVectorEnhancements1()))
882 return false;
883
884 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
885 // preferred way of creating all-zero and all-one vectors so give it
886 // priority over other methods below.
887 unsigned Mask = 0;
888 unsigned I = 0;
889 for (; I < SystemZ::VectorBytes; ++I) {
890 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
891 if (Byte == 0xff)
892 Mask |= 1ULL << I;
893 else if (Byte != 0)
894 break;
895 }
896 if (I == SystemZ::VectorBytes) {
898 OpVals.push_back(Mask);
900 return true;
901 }
902
903 if (SplatBitSize > 64)
904 return false;
905
906 auto TryValue = [&](uint64_t Value) -> bool {
907 // Try VECTOR REPLICATE IMMEDIATE
908 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
909 if (isInt<16>(SignedValue)) {
910 OpVals.push_back(((unsigned) SignedValue));
913 SystemZ::VectorBits / SplatBitSize);
914 return true;
915 }
916 // Try VECTOR GENERATE MASK
917 unsigned Start, End;
918 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
919 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
920 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
921 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
922 OpVals.push_back(Start - (64 - SplatBitSize));
923 OpVals.push_back(End - (64 - SplatBitSize));
926 SystemZ::VectorBits / SplatBitSize);
927 return true;
928 }
929 return false;
930 };
931
932 // First try assuming that any undefined bits above the highest set bit
933 // and below the lowest set bit are 1s. This increases the likelihood of
934 // being able to use a sign-extended element value in VECTOR REPLICATE
935 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
936 uint64_t SplatBitsZ = SplatBits.getZExtValue();
937 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
938 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
939 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
940 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
941 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
942 if (TryValue(SplatBitsZ | Upper | Lower))
943 return true;
944
945 // Now try assuming that any undefined bits between the first and
946 // last defined set bits are set. This increases the chances of
947 // using a non-wraparound mask.
948 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
949 return TryValue(SplatBitsZ | Middle);
950}
951
953 if (IntImm.isSingleWord()) {
954 IntBits = APInt(128, IntImm.getZExtValue());
955 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
956 } else
957 IntBits = IntImm;
958 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
959
960 // Find the smallest splat.
961 SplatBits = IntImm;
962 unsigned Width = SplatBits.getBitWidth();
963 while (Width > 8) {
964 unsigned HalfSize = Width / 2;
965 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
966 APInt LowValue = SplatBits.trunc(HalfSize);
967
968 // If the two halves do not match, stop here.
969 if (HighValue != LowValue || 8 > HalfSize)
970 break;
971
972 SplatBits = HighValue;
973 Width = HalfSize;
974 }
975 SplatUndef = 0;
976 SplatBitSize = Width;
977}
978
980 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
981 bool HasAnyUndefs;
982
983 // Get IntBits by finding the 128 bit splat.
984 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
985 true);
986
987 // Get SplatBits by finding the 8 bit or greater splat.
988 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
989 true);
990}
991
993 bool ForCodeSize) const {
994 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
995 if (Imm.isZero() || Imm.isNegZero())
996 return true;
997
999}
1000
1003 MachineBasicBlock *MBB) const {
1004 DebugLoc DL = MI.getDebugLoc();
1005 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1006 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1007
1008 MachineFunction *MF = MBB->getParent();
1010
1011 const BasicBlock *BB = MBB->getBasicBlock();
1012 MachineFunction::iterator I = ++MBB->getIterator();
1013
1014 Register DstReg = MI.getOperand(0).getReg();
1015 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1016 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1017 (void)TRI;
1018 Register MainDstReg = MRI.createVirtualRegister(RC);
1019 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1020
1021 MVT PVT = getPointerTy(MF->getDataLayout());
1022 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1023 // For v = setjmp(buf), we generate.
1024 // Algorithm:
1025 //
1026 // ---------
1027 // | thisMBB |
1028 // ---------
1029 // |
1030 // ------------------------
1031 // | |
1032 // ---------- ---------------
1033 // | mainMBB | | restoreMBB |
1034 // | v = 0 | | v = 1 |
1035 // ---------- ---------------
1036 // | |
1037 // -------------------------
1038 // |
1039 // -----------------------------
1040 // | sinkMBB |
1041 // | phi(v_mainMBB,v_restoreMBB) |
1042 // -----------------------------
1043 // thisMBB:
1044 // buf[FPOffset] = Frame Pointer if hasFP.
1045 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1046 // buf[BCOffset] = Backchain value if building with -mbackchain.
1047 // buf[SPOffset] = Stack Pointer.
1048 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1049 // SjLjSetup restoreMBB
1050 // mainMBB:
1051 // v_main = 0
1052 // sinkMBB:
1053 // v = phi(v_main, v_restore)
1054 // restoreMBB:
1055 // v_restore = 1
1056
1057 MachineBasicBlock *ThisMBB = MBB;
1058 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1059 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1060 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1061
1062 MF->insert(I, MainMBB);
1063 MF->insert(I, SinkMBB);
1064 MF->push_back(RestoreMBB);
1065 RestoreMBB->setMachineBlockAddressTaken();
1066
1068
1069 // Transfer the remainder of BB and its successor edges to sinkMBB.
1070 SinkMBB->splice(SinkMBB->begin(), MBB,
1071 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1073
1074 // thisMBB:
1075 const int64_t FPOffset = 0; // Slot 1.
1076 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1077 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1078 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1079
1080 // Buf address.
1081 Register BufReg = MI.getOperand(1).getReg();
1082
1083 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1084 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1085
1086 // Prepare IP for longjmp.
1087 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1088 .addMBB(RestoreMBB);
1089 // Store IP for return from jmp, slot 2, offset = 1.
1090 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1091 .addReg(LabelReg)
1092 .addReg(BufReg)
1093 .addImm(LabelOffset)
1094 .addReg(0);
1095
1096 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1097 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1098 if (HasFP) {
1099 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1100 .addReg(SpecialRegs->getFramePointerRegister())
1101 .addReg(BufReg)
1102 .addImm(FPOffset)
1103 .addReg(0);
1104 }
1105
1106 // Store SP.
1107 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1108 .addReg(SpecialRegs->getStackPointerRegister())
1109 .addReg(BufReg)
1110 .addImm(SPOffset)
1111 .addReg(0);
1112
1113 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1114 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1115 if (BackChain) {
1116 Register BCReg = MRI.createVirtualRegister(PtrRC);
1117 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1118 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1119 .addReg(SpecialRegs->getStackPointerRegister())
1120 .addImm(TFL->getBackchainOffset(*MF))
1121 .addReg(0);
1122
1123 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1124 .addReg(BCReg)
1125 .addReg(BufReg)
1126 .addImm(BCOffset)
1127 .addReg(0);
1128 }
1129
1130 // Setup.
1131 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1132 .addMBB(RestoreMBB);
1133
1134 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1135 MIB.addRegMask(RegInfo->getNoPreservedMask());
1136
1137 ThisMBB->addSuccessor(MainMBB);
1138 ThisMBB->addSuccessor(RestoreMBB);
1139
1140 // mainMBB:
1141 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1142 MainMBB->addSuccessor(SinkMBB);
1143
1144 // sinkMBB:
1145 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1146 .addReg(MainDstReg)
1147 .addMBB(MainMBB)
1148 .addReg(RestoreDstReg)
1149 .addMBB(RestoreMBB);
1150
1151 // restoreMBB.
1152 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1153 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1154 RestoreMBB->addSuccessor(SinkMBB);
1155
1156 MI.eraseFromParent();
1157
1158 return SinkMBB;
1159}
1160
1163 MachineBasicBlock *MBB) const {
1164
1165 DebugLoc DL = MI.getDebugLoc();
1166 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1167
1168 MachineFunction *MF = MBB->getParent();
1170
1171 MVT PVT = getPointerTy(MF->getDataLayout());
1172 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1173 Register BufReg = MI.getOperand(0).getReg();
1174 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1175 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1176
1177 Register Tmp = MRI.createVirtualRegister(RC);
1178 Register BCReg = MRI.createVirtualRegister(RC);
1179
1181
1182 const int64_t FPOffset = 0;
1183 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1184 const int64_t BCOffset = 2 * PVT.getStoreSize();
1185 const int64_t SPOffset = 3 * PVT.getStoreSize();
1186 const int64_t LPOffset = 4 * PVT.getStoreSize();
1187
1188 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1189 .addReg(BufReg)
1190 .addImm(LabelOffset)
1191 .addReg(0);
1192
1193 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1194 SpecialRegs->getFramePointerRegister())
1195 .addReg(BufReg)
1196 .addImm(FPOffset)
1197 .addReg(0);
1198
1199 // We are restoring R13 even though we never stored in setjmp from llvm,
1200 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1201 // gcc setjmp and llvm longjmp.
1202 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1203 .addReg(BufReg)
1204 .addImm(LPOffset)
1205 .addReg(0);
1206
1207 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1208 if (BackChain) {
1209 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1210 .addReg(BufReg)
1211 .addImm(BCOffset)
1212 .addReg(0);
1213 }
1214
1215 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1216 SpecialRegs->getStackPointerRegister())
1217 .addReg(BufReg)
1218 .addImm(SPOffset)
1219 .addReg(0);
1220
1221 if (BackChain) {
1222 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1223 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1224 .addReg(BCReg)
1225 .addReg(SpecialRegs->getStackPointerRegister())
1226 .addImm(TFL->getBackchainOffset(*MF))
1227 .addReg(0);
1228 }
1229
1230 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1231
1232 MI.eraseFromParent();
1233 return MBB;
1234}
1235
1236/// Returns true if stack probing through inline assembly is requested.
1238 // If the function specifically requests inline stack probes, emit them.
1239 if (MF.getFunction().hasFnAttribute("probe-stack"))
1240 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1241 "inline-asm";
1242 return false;
1243}
1244
1249
1254
1257 // Don't expand subword operations as they require special treatment.
1258 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1260
1261 // Don't expand if there is a target instruction available.
1262 if (Subtarget.hasInterlockedAccess1() &&
1263 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1270
1272}
1273
1275 // We can use CGFI or CLGFI.
1276 return isInt<32>(Imm) || isUInt<32>(Imm);
1277}
1278
1280 // We can use ALGFI or SLGFI.
1281 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1282}
1283
1285 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1286 // Unaligned accesses should never be slower than the expanded version.
1287 // We check specifically for aligned accesses in the few cases where
1288 // they are required.
1289 if (Fast)
1290 *Fast = 1;
1291 return true;
1292}
1293
1295 EVT VT = Y.getValueType();
1296
1297 // We can use NC(G)RK for types in GPRs ...
1298 if (VT == MVT::i32 || VT == MVT::i64)
1299 return Subtarget.hasMiscellaneousExtensions3();
1300
1301 // ... or VNC for types in VRs.
1302 if (VT.isVector() || VT == MVT::i128)
1303 return Subtarget.hasVector();
1304
1305 return false;
1306}
1307
1308// Information about the addressing mode for a memory access.
1310 // True if a long displacement is supported.
1312
1313 // True if use of index register is supported.
1315
1316 AddressingMode(bool LongDispl, bool IdxReg) :
1317 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1318};
1319
1320// Return the desired addressing mode for a Load which has only one use (in
1321// the same block) which is a Store.
1323 Type *Ty) {
1324 // With vector support a Load->Store combination may be combined to either
1325 // an MVC or vector operations and it seems to work best to allow the
1326 // vector addressing mode.
1327 if (HasVector)
1328 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1329
1330 // Otherwise only the MVC case is special.
1331 bool MVC = Ty->isIntegerTy(8);
1332 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1333}
1334
1335// Return the addressing mode which seems most desirable given an LLVM
1336// Instruction pointer.
1337static AddressingMode
1340 switch (II->getIntrinsicID()) {
1341 default: break;
1342 case Intrinsic::memset:
1343 case Intrinsic::memmove:
1344 case Intrinsic::memcpy:
1345 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1346 }
1347 }
1348
1349 if (isa<LoadInst>(I) && I->hasOneUse()) {
1350 auto *SingleUser = cast<Instruction>(*I->user_begin());
1351 if (SingleUser->getParent() == I->getParent()) {
1352 if (isa<ICmpInst>(SingleUser)) {
1353 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1354 if (C->getBitWidth() <= 64 &&
1355 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1356 // Comparison of memory with 16 bit signed / unsigned immediate
1357 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1358 } else if (isa<StoreInst>(SingleUser))
1359 // Load->Store
1360 return getLoadStoreAddrMode(HasVector, I->getType());
1361 }
1362 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1363 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1364 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1365 // Load->Store
1366 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1367 }
1368
1369 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1370
1371 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1372 // dependencies (LDE only supports small offsets).
1373 // * Utilize the vector registers to hold floating point
1374 // values (vector load / store instructions only support small
1375 // offsets).
1376
1377 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1378 I->getOperand(0)->getType());
1379 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1380 bool IsVectorAccess = MemAccessTy->isVectorTy();
1381
1382 // A store of an extracted vector element will be combined into a VSTE type
1383 // instruction.
1384 if (!IsVectorAccess && isa<StoreInst>(I)) {
1385 Value *DataOp = I->getOperand(0);
1386 if (isa<ExtractElementInst>(DataOp))
1387 IsVectorAccess = true;
1388 }
1389
1390 // A load which gets inserted into a vector element will be combined into a
1391 // VLE type instruction.
1392 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1393 User *LoadUser = *I->user_begin();
1394 if (isa<InsertElementInst>(LoadUser))
1395 IsVectorAccess = true;
1396 }
1397
1398 if (IsFPAccess || IsVectorAccess)
1399 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1400 }
1401
1402 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1403}
1404
1406 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1407 // Punt on globals for now, although they can be used in limited
1408 // RELATIVE LONG cases.
1409 if (AM.BaseGV)
1410 return false;
1411
1412 // Require a 20-bit signed offset.
1413 if (!isInt<20>(AM.BaseOffs))
1414 return false;
1415
1416 bool RequireD12 =
1417 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1418 AddressingMode SupportedAM(!RequireD12, true);
1419 if (I != nullptr)
1420 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1421
1422 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1423 return false;
1424
1425 if (!SupportedAM.IndexReg)
1426 // No indexing allowed.
1427 return AM.Scale == 0;
1428 else
1429 // Indexing is OK but no scale factor can be applied.
1430 return AM.Scale == 0 || AM.Scale == 1;
1431}
1432
1434 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1435 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1436 const AttributeList &FuncAttributes) const {
1437 const int MVCFastLen = 16;
1438
1439 if (Limit != ~unsigned(0)) {
1440 // Don't expand Op into scalar loads/stores in these cases:
1441 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1442 return false; // Small memcpy: Use MVC
1443 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1444 return false; // Small memset (first byte with STC/MVI): Use MVC
1445 if (Op.isZeroMemset())
1446 return false; // Memset zero: Use XC
1447 }
1448
1449 return TargetLowering::findOptimalMemOpLowering(Context, MemOps, Limit, Op,
1450 DstAS, SrcAS, FuncAttributes);
1451}
1452
1454 LLVMContext &Context, const MemOp &Op,
1455 const AttributeList &FuncAttributes) const {
1456 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1457}
1458
1459bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1460 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1461 return false;
1462 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1463 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1464 return FromBits > ToBits;
1465}
1466
1468 if (!FromVT.isInteger() || !ToVT.isInteger())
1469 return false;
1470 unsigned FromBits = FromVT.getFixedSizeInBits();
1471 unsigned ToBits = ToVT.getFixedSizeInBits();
1472 return FromBits > ToBits;
1473}
1474
1475//===----------------------------------------------------------------------===//
1476// Inline asm support
1477//===----------------------------------------------------------------------===//
1478
1481 if (Constraint.size() == 1) {
1482 switch (Constraint[0]) {
1483 case 'a': // Address register
1484 case 'd': // Data register (equivalent to 'r')
1485 case 'f': // Floating-point register
1486 case 'h': // High-part register
1487 case 'r': // General-purpose register
1488 case 'v': // Vector register
1489 return C_RegisterClass;
1490
1491 case 'Q': // Memory with base and unsigned 12-bit displacement
1492 case 'R': // Likewise, plus an index
1493 case 'S': // Memory with base and signed 20-bit displacement
1494 case 'T': // Likewise, plus an index
1495 case 'm': // Equivalent to 'T'.
1496 return C_Memory;
1497
1498 case 'I': // Unsigned 8-bit constant
1499 case 'J': // Unsigned 12-bit constant
1500 case 'K': // Signed 16-bit constant
1501 case 'L': // Signed 20-bit displacement (on all targets we support)
1502 case 'M': // 0x7fffffff
1503 return C_Immediate;
1504
1505 default:
1506 break;
1507 }
1508 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1509 switch (Constraint[1]) {
1510 case 'Q': // Address with base and unsigned 12-bit displacement
1511 case 'R': // Likewise, plus an index
1512 case 'S': // Address with base and signed 20-bit displacement
1513 case 'T': // Likewise, plus an index
1514 return C_Address;
1515
1516 default:
1517 break;
1518 }
1519 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1520 if (StringRef("{@cc}").compare(Constraint) == 0)
1521 return C_Other;
1522 }
1523 return TargetLowering::getConstraintType(Constraint);
1524}
1525
1528 AsmOperandInfo &Info, const char *Constraint) const {
1530 Value *CallOperandVal = Info.CallOperandVal;
1531 // If we don't have a value, we can't do a match,
1532 // but allow it at the lowest weight.
1533 if (!CallOperandVal)
1534 return CW_Default;
1535 Type *type = CallOperandVal->getType();
1536 // Look at the constraint type.
1537 switch (*Constraint) {
1538 default:
1539 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1540 break;
1541
1542 case 'a': // Address register
1543 case 'd': // Data register (equivalent to 'r')
1544 case 'h': // High-part register
1545 case 'r': // General-purpose register
1546 Weight =
1547 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1548 break;
1549
1550 case 'f': // Floating-point register
1551 if (!useSoftFloat())
1552 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1553 break;
1554
1555 case 'v': // Vector register
1556 if (Subtarget.hasVector())
1557 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1558 : CW_Default;
1559 break;
1560
1561 case 'I': // Unsigned 8-bit constant
1562 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1563 if (isUInt<8>(C->getZExtValue()))
1564 Weight = CW_Constant;
1565 break;
1566
1567 case 'J': // Unsigned 12-bit constant
1568 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1569 if (isUInt<12>(C->getZExtValue()))
1570 Weight = CW_Constant;
1571 break;
1572
1573 case 'K': // Signed 16-bit constant
1574 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1575 if (isInt<16>(C->getSExtValue()))
1576 Weight = CW_Constant;
1577 break;
1578
1579 case 'L': // Signed 20-bit displacement (on all targets we support)
1580 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1581 if (isInt<20>(C->getSExtValue()))
1582 Weight = CW_Constant;
1583 break;
1584
1585 case 'M': // 0x7fffffff
1586 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1587 if (C->getZExtValue() == 0x7fffffff)
1588 Weight = CW_Constant;
1589 break;
1590 }
1591 return Weight;
1592}
1593
1594// Parse a "{tNNN}" register constraint for which the register type "t"
1595// has already been verified. MC is the class associated with "t" and
1596// Map maps 0-based register numbers to LLVM register numbers.
1597static std::pair<unsigned, const TargetRegisterClass *>
1599 const unsigned *Map, unsigned Size) {
1600 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1601 if (isdigit(Constraint[2])) {
1602 unsigned Index;
1603 bool Failed =
1604 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1605 if (!Failed && Index < Size && Map[Index])
1606 return std::make_pair(Map[Index], RC);
1607 }
1608 return std::make_pair(0U, nullptr);
1609}
1610
1611std::pair<unsigned, const TargetRegisterClass *>
1613 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1614 if (Constraint.size() == 1) {
1615 // GCC Constraint Letters
1616 switch (Constraint[0]) {
1617 default: break;
1618 case 'd': // Data register (equivalent to 'r')
1619 case 'r': // General-purpose register
1620 if (VT.getSizeInBits() == 64)
1621 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1622 else if (VT.getSizeInBits() == 128)
1623 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1624 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1625
1626 case 'a': // Address register
1627 if (VT == MVT::i64)
1628 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1629 else if (VT == MVT::i128)
1630 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1631 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1632
1633 case 'h': // High-part register (an LLVM extension)
1634 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1635
1636 case 'f': // Floating-point register
1637 if (!useSoftFloat()) {
1638 if (VT.getSizeInBits() == 16)
1639 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1640 else if (VT.getSizeInBits() == 64)
1641 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1642 else if (VT.getSizeInBits() == 128)
1643 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1644 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1645 }
1646 break;
1647
1648 case 'v': // Vector register
1649 if (Subtarget.hasVector()) {
1650 if (VT.getSizeInBits() == 16)
1651 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1652 if (VT.getSizeInBits() == 32)
1653 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1654 if (VT.getSizeInBits() == 64)
1655 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1656 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1657 }
1658 break;
1659 }
1660 }
1661 if (Constraint.starts_with("{")) {
1662
1663 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1664 // to check the size on.
1665 auto getVTSizeInBits = [&VT]() {
1666 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1667 };
1668
1669 // We need to override the default register parsing for GPRs and FPRs
1670 // because the interpretation depends on VT. The internal names of
1671 // the registers are also different from the external names
1672 // (F0D and F0S instead of F0, etc.).
1673 if (Constraint[1] == 'r') {
1674 if (getVTSizeInBits() == 32)
1675 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1677 if (getVTSizeInBits() == 128)
1678 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1680 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1682 }
1683 if (Constraint[1] == 'f') {
1684 if (useSoftFloat())
1685 return std::make_pair(
1686 0u, static_cast<const TargetRegisterClass *>(nullptr));
1687 if (getVTSizeInBits() == 16)
1688 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1690 if (getVTSizeInBits() == 32)
1691 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1693 if (getVTSizeInBits() == 128)
1694 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1696 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1698 }
1699 if (Constraint[1] == 'v') {
1700 if (!Subtarget.hasVector())
1701 return std::make_pair(
1702 0u, static_cast<const TargetRegisterClass *>(nullptr));
1703 if (getVTSizeInBits() == 16)
1704 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1706 if (getVTSizeInBits() == 32)
1707 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1709 if (getVTSizeInBits() == 64)
1710 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1712 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1714 }
1715 if (Constraint[1] == '@') {
1716 if (StringRef("{@cc}").compare(Constraint) == 0)
1717 return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
1718 }
1719 }
1720 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1721}
1722
1723// FIXME? Maybe this could be a TableGen attribute on some registers and
1724// this table could be generated automatically from RegInfo.
1727 const MachineFunction &MF) const {
1728 Register Reg =
1730 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1731 : SystemZ::NoRegister)
1732 .Case("r15",
1733 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1734 .Default(Register());
1735
1736 return Reg;
1737}
1738
1740 const Constant *PersonalityFn) const {
1741 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1742}
1743
1745 const Constant *PersonalityFn) const {
1746 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1747}
1748
1749// Convert condition code in CCReg to an i32 value.
1751 SDLoc DL(CCReg);
1752 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1753 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1754 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1755}
1756
1757// Lower @cc targets via setcc.
1759 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1760 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1761 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1762 return SDValue();
1763
1764 // Check that return type is valid.
1765 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1766 OpInfo.ConstraintVT.getSizeInBits() < 8)
1767 report_fatal_error("Glue output operand is of invalid type");
1768
1769 if (Glue.getNode()) {
1770 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1771 Chain = Glue.getValue(1);
1772 } else
1773 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1774 return getCCResult(DAG, Glue);
1775}
1776
1778 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1779 SelectionDAG &DAG) const {
1780 // Only support length 1 constraints for now.
1781 if (Constraint.size() == 1) {
1782 switch (Constraint[0]) {
1783 case 'I': // Unsigned 8-bit constant
1784 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1785 if (isUInt<8>(C->getZExtValue()))
1786 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1787 Op.getValueType()));
1788 return;
1789
1790 case 'J': // Unsigned 12-bit constant
1791 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1792 if (isUInt<12>(C->getZExtValue()))
1793 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1794 Op.getValueType()));
1795 return;
1796
1797 case 'K': // Signed 16-bit constant
1798 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1799 if (isInt<16>(C->getSExtValue()))
1800 Ops.push_back(DAG.getSignedTargetConstant(
1801 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1802 return;
1803
1804 case 'L': // Signed 20-bit displacement (on all targets we support)
1805 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1806 if (isInt<20>(C->getSExtValue()))
1807 Ops.push_back(DAG.getSignedTargetConstant(
1808 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1809 return;
1810
1811 case 'M': // 0x7fffffff
1812 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1813 if (C->getZExtValue() == 0x7fffffff)
1814 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1815 Op.getValueType()));
1816 return;
1817 }
1818 }
1820}
1821
1822//===----------------------------------------------------------------------===//
1823// Calling conventions
1824//===----------------------------------------------------------------------===//
1825
1826#include "SystemZGenCallingConv.inc"
1827
1829 CallingConv::ID) const {
1830 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1831 SystemZ::R14D, 0 };
1832 return ScratchRegs;
1833}
1834
1836 Type *ToType) const {
1837 return isTruncateFree(FromType, ToType);
1838}
1839
1841 return CI->isTailCall();
1842}
1843
1844// Value is a value that has been passed to us in the location described by VA
1845// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1846// any loads onto Chain.
1848 CCValAssign &VA, SDValue Chain,
1849 SDValue Value) {
1850 // If the argument has been promoted from a smaller type, insert an
1851 // assertion to capture this.
1852 if (VA.getLocInfo() == CCValAssign::SExt)
1854 DAG.getValueType(VA.getValVT()));
1855 else if (VA.getLocInfo() == CCValAssign::ZExt)
1857 DAG.getValueType(VA.getValVT()));
1858
1859 if (VA.isExtInLoc())
1860 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1861 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1862 // If this is a short vector argument loaded from the stack,
1863 // extend from i64 to full vector size and then bitcast.
1864 assert(VA.getLocVT() == MVT::i64);
1865 assert(VA.getValVT().isVector());
1866 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1867 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1868 } else
1869 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1870 return Value;
1871}
1872
1873// Value is a value of type VA.getValVT() that we need to copy into
1874// the location described by VA. Return a copy of Value converted to
1875// VA.getValVT(). The caller is responsible for handling indirect values.
1877 CCValAssign &VA, SDValue Value) {
1878 switch (VA.getLocInfo()) {
1879 case CCValAssign::SExt:
1880 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1881 case CCValAssign::ZExt:
1882 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1883 case CCValAssign::AExt:
1884 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1885 case CCValAssign::BCvt: {
1886 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1887 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1888 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1889 // For an f32 vararg we need to first promote it to an f64 and then
1890 // bitcast it to an i64.
1891 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1892 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1893 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1894 ? MVT::v2i64
1895 : VA.getLocVT();
1896 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1897 // For ELF, this is a short vector argument to be stored to the stack,
1898 // bitcast to v2i64 and then extract first element.
1899 if (BitCastToType == MVT::v2i64)
1900 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1901 DAG.getConstant(0, DL, MVT::i32));
1902 return Value;
1903 }
1904 case CCValAssign::Full:
1905 return Value;
1906 default:
1907 llvm_unreachable("Unhandled getLocInfo()");
1908 }
1909}
1910
1912 SDLoc DL(In);
1913 SDValue Lo, Hi;
1914 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1915 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1916 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1917 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1918 DAG.getConstant(64, DL, MVT::i32)));
1919 } else {
1920 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1921 }
1922
1923 // FIXME: If v2i64 were a legal type, we could use it instead of
1924 // Untyped here. This might enable improved folding.
1925 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1926 MVT::Untyped, Hi, Lo);
1927 return SDValue(Pair, 0);
1928}
1929
1931 SDLoc DL(In);
1932 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1933 DL, MVT::i64, In);
1934 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1935 DL, MVT::i64, In);
1936
1937 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1938 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1939 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1940 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1941 DAG.getConstant(64, DL, MVT::i32));
1942 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1943 } else {
1944 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1945 }
1946}
1947
1949 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1950 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1951 EVT ValueVT = Val.getValueType();
1952 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1953 // Inline assembly operand.
1954 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1955 return true;
1956 }
1957
1958 return false;
1959}
1960
1962 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1963 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1964 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1965 // Inline assembly operand.
1966 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1967 return DAG.getBitcast(ValueVT, Res);
1968 }
1969
1970 return SDValue();
1971}
1972
1974 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1975 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1976 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1978 MachineFrameInfo &MFI = MF.getFrameInfo();
1980 SystemZMachineFunctionInfo *FuncInfo =
1982 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1983 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1984
1985 // Assign locations to all of the incoming arguments.
1987 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1988 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1989 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1990
1991 unsigned NumFixedGPRs = 0;
1992 unsigned NumFixedFPRs = 0;
1993 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1994 SDValue ArgValue;
1995 CCValAssign &VA = ArgLocs[I];
1996 EVT LocVT = VA.getLocVT();
1997 if (VA.isRegLoc()) {
1998 // Arguments passed in registers
1999 const TargetRegisterClass *RC;
2000 switch (LocVT.getSimpleVT().SimpleTy) {
2001 default:
2002 // Integers smaller than i64 should be promoted to i64.
2003 llvm_unreachable("Unexpected argument type");
2004 case MVT::i32:
2005 NumFixedGPRs += 1;
2006 RC = &SystemZ::GR32BitRegClass;
2007 break;
2008 case MVT::i64:
2009 NumFixedGPRs += 1;
2010 RC = &SystemZ::GR64BitRegClass;
2011 break;
2012 case MVT::f16:
2013 NumFixedFPRs += 1;
2014 RC = &SystemZ::FP16BitRegClass;
2015 break;
2016 case MVT::f32:
2017 NumFixedFPRs += 1;
2018 RC = &SystemZ::FP32BitRegClass;
2019 break;
2020 case MVT::f64:
2021 NumFixedFPRs += 1;
2022 RC = &SystemZ::FP64BitRegClass;
2023 break;
2024 case MVT::f128:
2025 NumFixedFPRs += 2;
2026 RC = &SystemZ::FP128BitRegClass;
2027 break;
2028 case MVT::v16i8:
2029 case MVT::v8i16:
2030 case MVT::v4i32:
2031 case MVT::v2i64:
2032 case MVT::v4f32:
2033 case MVT::v2f64:
2034 RC = &SystemZ::VR128BitRegClass;
2035 break;
2036 }
2037
2038 Register VReg = MRI.createVirtualRegister(RC);
2039 MRI.addLiveIn(VA.getLocReg(), VReg);
2040 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2041 } else {
2042 assert(VA.isMemLoc() && "Argument not register or memory");
2043
2044 // Create the frame index object for this incoming parameter.
2045 // FIXME: Pre-include call frame size in the offset, should not
2046 // need to manually add it here.
2047 int64_t ArgSPOffset = VA.getLocMemOffset();
2048 if (Subtarget.isTargetXPLINK64()) {
2049 auto &XPRegs =
2050 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2051 ArgSPOffset += XPRegs.getCallFrameSize();
2052 }
2053 int FI =
2054 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2055
2056 // Create the SelectionDAG nodes corresponding to a load
2057 // from this parameter. Unpromoted ints and floats are
2058 // passed as right-justified 8-byte values.
2059 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2060 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2061 VA.getLocVT() == MVT::f16) {
2062 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2063 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2064 DAG.getIntPtrConstant(SlotOffs, DL));
2065 }
2066 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2068 }
2069
2070 // Convert the value of the argument register into the value that's
2071 // being passed.
2072 if (VA.getLocInfo() == CCValAssign::Indirect) {
2073 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2075 // If the original argument was split (e.g. i128), we need
2076 // to load all parts of it here (using the same address).
2077 unsigned ArgIndex = Ins[I].OrigArgIndex;
2078 assert (Ins[I].PartOffset == 0);
2079 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
2080 CCValAssign &PartVA = ArgLocs[I + 1];
2081 unsigned PartOffset = Ins[I + 1].PartOffset;
2082 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2083 DAG.getIntPtrConstant(PartOffset, DL));
2084 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2086 ++I;
2087 }
2088 } else
2089 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2090 }
2091
2092 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2093 // Save the number of non-varargs registers for later use by va_start, etc.
2094 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2095 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2096
2097 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2098 Subtarget.getSpecialRegisters());
2099
2100 // Likewise the address (in the form of a frame index) of where the
2101 // first stack vararg would be. The 1-byte size here is arbitrary.
2102 // FIXME: Pre-include call frame size in the offset, should not
2103 // need to manually add it here.
2104 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2105 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2106 FuncInfo->setVarArgsFrameIndex(FI);
2107 }
2108
2109 if (IsVarArg && Subtarget.isTargetELF()) {
2110 // Save the number of non-varargs registers for later use by va_start, etc.
2111 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2112 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2113
2114 // Likewise the address (in the form of a frame index) of where the
2115 // first stack vararg would be. The 1-byte size here is arbitrary.
2116 int64_t VarArgsOffset = CCInfo.getStackSize();
2117 FuncInfo->setVarArgsFrameIndex(
2118 MFI.CreateFixedObject(1, VarArgsOffset, true));
2119
2120 // ...and a similar frame index for the caller-allocated save area
2121 // that will be used to store the incoming registers.
2122 int64_t RegSaveOffset =
2123 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2124 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2125 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2126
2127 // Store the FPR varargs in the reserved frame slots. (We store the
2128 // GPRs as part of the prologue.)
2129 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2131 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2132 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2133 int FI =
2135 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2137 &SystemZ::FP64BitRegClass);
2138 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2139 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2141 }
2142 // Join the stores, which are independent of one another.
2143 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2144 ArrayRef(&MemOps[NumFixedFPRs],
2145 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2146 }
2147 }
2148
2149 if (Subtarget.isTargetXPLINK64()) {
2150 // Create virual register for handling incoming "ADA" special register (R5)
2151 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2152 Register ADAvReg = MRI.createVirtualRegister(RC);
2153 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2154 Subtarget.getSpecialRegisters());
2155 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2156 FuncInfo->setADAVirtualRegister(ADAvReg);
2157 }
2158 return Chain;
2159}
2160
2161static bool canUseSiblingCall(const CCState &ArgCCInfo,
2164 // Punt if there are any indirect or stack arguments, or if the call
2165 // needs the callee-saved argument register R6, or if the call uses
2166 // the callee-saved register arguments SwiftSelf and SwiftError.
2167 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2168 CCValAssign &VA = ArgLocs[I];
2170 return false;
2171 if (!VA.isRegLoc())
2172 return false;
2173 Register Reg = VA.getLocReg();
2174 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2175 return false;
2176 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2177 return false;
2178 }
2179 return true;
2180}
2181
2183 unsigned Offset, bool LoadAdr = false) {
2186 Register ADAvReg = MFI->getADAVirtualRegister();
2188
2189 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2190 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2191
2192 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2193 if (!LoadAdr)
2194 Result = DAG.getLoad(
2195 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2197
2198 return Result;
2199}
2200
2201// ADA access using Global value
2202// Note: for functions, address of descriptor is returned
2204 EVT PtrVT) {
2205 unsigned ADAtype;
2206 bool LoadAddr = false;
2207 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2208 bool IsFunction =
2209 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2210 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2211
2212 if (IsFunction) {
2213 if (IsInternal) {
2215 LoadAddr = true;
2216 } else
2218 } else {
2220 }
2221 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2222
2223 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2224}
2225
2226static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2227 SDLoc &DL, SDValue &Chain) {
2228 unsigned ADADelta = 0; // ADA offset in desc.
2229 unsigned EPADelta = 8; // EPA offset in desc.
2232
2233 // XPLink calling convention.
2234 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2235 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2236 G->getGlobal()->hasPrivateLinkage());
2237 if (IsInternal) {
2240 Register ADAvReg = MFI->getADAVirtualRegister();
2241 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2242 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2243 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2244 return true;
2245 } else {
2247 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2248 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2249 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2250 }
2251 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2253 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2254 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2255 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2256 } else {
2257 // Function pointer case
2258 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2259 DAG.getConstant(ADADelta, DL, PtrVT));
2260 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2262 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2263 DAG.getConstant(EPADelta, DL, PtrVT));
2264 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2266 }
2267 return false;
2268}
2269
2270SDValue
2272 SmallVectorImpl<SDValue> &InVals) const {
2273 SelectionDAG &DAG = CLI.DAG;
2274 SDLoc &DL = CLI.DL;
2276 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2278 SDValue Chain = CLI.Chain;
2279 SDValue Callee = CLI.Callee;
2280 bool &IsTailCall = CLI.IsTailCall;
2281 CallingConv::ID CallConv = CLI.CallConv;
2282 bool IsVarArg = CLI.IsVarArg;
2284 EVT PtrVT = getPointerTy(MF.getDataLayout());
2285 LLVMContext &Ctx = *DAG.getContext();
2286 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2287
2288 // FIXME: z/OS support to be added in later.
2289 if (Subtarget.isTargetXPLINK64())
2290 IsTailCall = false;
2291
2292 // Integer args <=32 bits should have an extension attribute.
2293 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2294
2295 // Analyze the operands of the call, assigning locations to each operand.
2297 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2298 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2299
2300 // We don't support GuaranteedTailCallOpt, only automatically-detected
2301 // sibling calls.
2302 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2303 IsTailCall = false;
2304
2305 // Get a count of how many bytes are to be pushed on the stack.
2306 unsigned NumBytes = ArgCCInfo.getStackSize();
2307
2308 // Mark the start of the call.
2309 if (!IsTailCall)
2310 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2311
2312 // Copy argument values to their designated locations.
2314 SmallVector<SDValue, 8> MemOpChains;
2315 SDValue StackPtr;
2316 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2317 CCValAssign &VA = ArgLocs[I];
2318 SDValue ArgValue = OutVals[I];
2319
2320 if (VA.getLocInfo() == CCValAssign::Indirect) {
2321 // Store the argument in a stack slot and pass its address.
2322 unsigned ArgIndex = Outs[I].OrigArgIndex;
2323 EVT SlotVT;
2324 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2325 // Allocate the full stack space for a promoted (and split) argument.
2326 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
2327 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
2328 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2329 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2330 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
2331 } else {
2332 SlotVT = Outs[I].VT;
2333 }
2334 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2335 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2336 MemOpChains.push_back(
2337 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2339 // If the original argument was split (e.g. i128), we need
2340 // to store all parts of it here (and pass just one address).
2341 assert (Outs[I].PartOffset == 0);
2342 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2343 SDValue PartValue = OutVals[I + 1];
2344 unsigned PartOffset = Outs[I + 1].PartOffset;
2345 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2346 DAG.getIntPtrConstant(PartOffset, DL));
2347 MemOpChains.push_back(
2348 DAG.getStore(Chain, DL, PartValue, Address,
2350 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2351 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2352 ++I;
2353 }
2354 ArgValue = SpillSlot;
2355 } else
2356 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2357
2358 if (VA.isRegLoc()) {
2359 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2360 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2361 // and low values.
2362 if (VA.getLocVT() == MVT::i128)
2363 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2364 // Queue up the argument copies and emit them at the end.
2365 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2366 } else {
2367 assert(VA.isMemLoc() && "Argument not register or memory");
2368
2369 // Work out the address of the stack slot. Unpromoted ints and
2370 // floats are passed as right-justified 8-byte values.
2371 if (!StackPtr.getNode())
2372 StackPtr = DAG.getCopyFromReg(Chain, DL,
2373 Regs->getStackPointerRegister(), PtrVT);
2374 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2375 VA.getLocMemOffset();
2376 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2377 Offset += 4;
2378 else if (VA.getLocVT() == MVT::f16)
2379 Offset += 6;
2380 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2382
2383 // Emit the store.
2384 MemOpChains.push_back(
2385 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2386
2387 // Although long doubles or vectors are passed through the stack when
2388 // they are vararg (non-fixed arguments), if a long double or vector
2389 // occupies the third and fourth slot of the argument list GPR3 should
2390 // still shadow the third slot of the argument list.
2391 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2392 SDValue ShadowArgValue =
2393 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2394 DAG.getIntPtrConstant(1, DL));
2395 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2396 }
2397 }
2398 }
2399
2400 // Join the stores, which are independent of one another.
2401 if (!MemOpChains.empty())
2402 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2403
2404 // Accept direct calls by converting symbolic call addresses to the
2405 // associated Target* opcodes. Force %r1 to be used for indirect
2406 // tail calls.
2407 SDValue Glue;
2408
2409 if (Subtarget.isTargetXPLINK64()) {
2410 SDValue ADA;
2411 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2412 if (!IsBRASL) {
2413 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2414 ->getAddressOfCalleeRegister();
2415 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2416 Glue = Chain.getValue(1);
2417 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2418 }
2419 RegsToPass.push_back(std::make_pair(
2420 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2421 } else {
2422 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2423 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2424 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2425 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2426 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2427 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2428 } else if (IsTailCall) {
2429 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2430 Glue = Chain.getValue(1);
2431 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2432 }
2433 }
2434
2435 // Build a sequence of copy-to-reg nodes, chained and glued together.
2436 for (const auto &[Reg, N] : RegsToPass) {
2437 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2438 Glue = Chain.getValue(1);
2439 }
2440
2441 // The first call operand is the chain and the second is the target address.
2443 Ops.push_back(Chain);
2444 Ops.push_back(Callee);
2445
2446 // Add argument registers to the end of the list so that they are
2447 // known live into the call.
2448 for (const auto &[Reg, N] : RegsToPass)
2449 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2450
2451 // Add a register mask operand representing the call-preserved registers.
2452 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2453 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2454 assert(Mask && "Missing call preserved mask for calling convention");
2455 Ops.push_back(DAG.getRegisterMask(Mask));
2456
2457 // Glue the call to the argument copies, if any.
2458 if (Glue.getNode())
2459 Ops.push_back(Glue);
2460
2461 // Emit the call.
2462 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2463 if (IsTailCall) {
2464 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2465 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2466 return Ret;
2467 }
2468 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2469 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2470 Glue = Chain.getValue(1);
2471
2472 // Mark the end of the call, which is glued to the call itself.
2473 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2474 Glue = Chain.getValue(1);
2475
2476 // Assign locations to each value returned by this call.
2478 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2479 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2480
2481 // Copy all of the result registers out of their specified physreg.
2482 for (CCValAssign &VA : RetLocs) {
2483 // Copy the value out, gluing the copy to the end of the call sequence.
2484 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2485 VA.getLocVT(), Glue);
2486 Chain = RetValue.getValue(1);
2487 Glue = RetValue.getValue(2);
2488
2489 // Convert the value of the return register into the value that's
2490 // being returned.
2491 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2492 }
2493
2494 return Chain;
2495}
2496
2497// Generate a call taking the given operands as arguments and returning a
2498// result of type RetVT.
2500 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2501 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2502 bool DoesNotReturn, bool IsReturnValueUsed) const {
2504 Args.reserve(Ops.size());
2505
2506 for (SDValue Op : Ops) {
2508 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2509 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2510 Entry.IsZExt = !Entry.IsSExt;
2511 Args.push_back(Entry);
2512 }
2513
2514 SDValue Callee =
2515 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2516
2517 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2519 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2520 CLI.setDebugLoc(DL)
2521 .setChain(Chain)
2522 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2523 .setNoReturn(DoesNotReturn)
2524 .setDiscardResult(!IsReturnValueUsed)
2525 .setSExtResult(SignExtend)
2526 .setZExtResult(!SignExtend);
2527 return LowerCallTo(CLI);
2528}
2529
2531 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2532 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2533 const Type *RetTy) const {
2534 // Special case that we cannot easily detect in RetCC_SystemZ since
2535 // i128 may not be a legal type.
2536 for (auto &Out : Outs)
2537 if (Out.ArgVT == MVT::i128)
2538 return false;
2539
2541 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2542 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2543}
2544
2545SDValue
2547 bool IsVarArg,
2549 const SmallVectorImpl<SDValue> &OutVals,
2550 const SDLoc &DL, SelectionDAG &DAG) const {
2552
2553 // Integer args <=32 bits should have an extension attribute.
2554 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2555
2556 // Assign locations to each returned value.
2558 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2559 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2560
2561 // Quick exit for void returns
2562 if (RetLocs.empty())
2563 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2564
2565 if (CallConv == CallingConv::GHC)
2566 report_fatal_error("GHC functions return void only");
2567
2568 // Copy the result values into the output registers.
2569 SDValue Glue;
2571 RetOps.push_back(Chain);
2572 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2573 CCValAssign &VA = RetLocs[I];
2574 SDValue RetValue = OutVals[I];
2575
2576 // Make the return register live on exit.
2577 assert(VA.isRegLoc() && "Can only return in registers!");
2578
2579 // Promote the value as required.
2580 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2581
2582 // Chain and glue the copies together.
2583 Register Reg = VA.getLocReg();
2584 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2585 Glue = Chain.getValue(1);
2586 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2587 }
2588
2589 // Update chain and glue.
2590 RetOps[0] = Chain;
2591 if (Glue.getNode())
2592 RetOps.push_back(Glue);
2593
2594 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2595}
2596
2597// Return true if Op is an intrinsic node with chain that returns the CC value
2598// as its only (other) argument. Provide the associated SystemZISD opcode and
2599// the mask of valid CC values if so.
2600static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2601 unsigned &CCValid) {
2602 unsigned Id = Op.getConstantOperandVal(1);
2603 switch (Id) {
2604 case Intrinsic::s390_tbegin:
2605 Opcode = SystemZISD::TBEGIN;
2606 CCValid = SystemZ::CCMASK_TBEGIN;
2607 return true;
2608
2609 case Intrinsic::s390_tbegin_nofloat:
2611 CCValid = SystemZ::CCMASK_TBEGIN;
2612 return true;
2613
2614 case Intrinsic::s390_tend:
2615 Opcode = SystemZISD::TEND;
2616 CCValid = SystemZ::CCMASK_TEND;
2617 return true;
2618
2619 default:
2620 return false;
2621 }
2622}
2623
2624// Return true if Op is an intrinsic node without chain that returns the
2625// CC value as its final argument. Provide the associated SystemZISD
2626// opcode and the mask of valid CC values if so.
2627static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2628 unsigned Id = Op.getConstantOperandVal(0);
2629 switch (Id) {
2630 case Intrinsic::s390_vpkshs:
2631 case Intrinsic::s390_vpksfs:
2632 case Intrinsic::s390_vpksgs:
2633 Opcode = SystemZISD::PACKS_CC;
2634 CCValid = SystemZ::CCMASK_VCMP;
2635 return true;
2636
2637 case Intrinsic::s390_vpklshs:
2638 case Intrinsic::s390_vpklsfs:
2639 case Intrinsic::s390_vpklsgs:
2640 Opcode = SystemZISD::PACKLS_CC;
2641 CCValid = SystemZ::CCMASK_VCMP;
2642 return true;
2643
2644 case Intrinsic::s390_vceqbs:
2645 case Intrinsic::s390_vceqhs:
2646 case Intrinsic::s390_vceqfs:
2647 case Intrinsic::s390_vceqgs:
2648 case Intrinsic::s390_vceqqs:
2649 Opcode = SystemZISD::VICMPES;
2650 CCValid = SystemZ::CCMASK_VCMP;
2651 return true;
2652
2653 case Intrinsic::s390_vchbs:
2654 case Intrinsic::s390_vchhs:
2655 case Intrinsic::s390_vchfs:
2656 case Intrinsic::s390_vchgs:
2657 case Intrinsic::s390_vchqs:
2658 Opcode = SystemZISD::VICMPHS;
2659 CCValid = SystemZ::CCMASK_VCMP;
2660 return true;
2661
2662 case Intrinsic::s390_vchlbs:
2663 case Intrinsic::s390_vchlhs:
2664 case Intrinsic::s390_vchlfs:
2665 case Intrinsic::s390_vchlgs:
2666 case Intrinsic::s390_vchlqs:
2667 Opcode = SystemZISD::VICMPHLS;
2668 CCValid = SystemZ::CCMASK_VCMP;
2669 return true;
2670
2671 case Intrinsic::s390_vtm:
2672 Opcode = SystemZISD::VTM;
2673 CCValid = SystemZ::CCMASK_VCMP;
2674 return true;
2675
2676 case Intrinsic::s390_vfaebs:
2677 case Intrinsic::s390_vfaehs:
2678 case Intrinsic::s390_vfaefs:
2679 Opcode = SystemZISD::VFAE_CC;
2680 CCValid = SystemZ::CCMASK_ANY;
2681 return true;
2682
2683 case Intrinsic::s390_vfaezbs:
2684 case Intrinsic::s390_vfaezhs:
2685 case Intrinsic::s390_vfaezfs:
2686 Opcode = SystemZISD::VFAEZ_CC;
2687 CCValid = SystemZ::CCMASK_ANY;
2688 return true;
2689
2690 case Intrinsic::s390_vfeebs:
2691 case Intrinsic::s390_vfeehs:
2692 case Intrinsic::s390_vfeefs:
2693 Opcode = SystemZISD::VFEE_CC;
2694 CCValid = SystemZ::CCMASK_ANY;
2695 return true;
2696
2697 case Intrinsic::s390_vfeezbs:
2698 case Intrinsic::s390_vfeezhs:
2699 case Intrinsic::s390_vfeezfs:
2700 Opcode = SystemZISD::VFEEZ_CC;
2701 CCValid = SystemZ::CCMASK_ANY;
2702 return true;
2703
2704 case Intrinsic::s390_vfenebs:
2705 case Intrinsic::s390_vfenehs:
2706 case Intrinsic::s390_vfenefs:
2707 Opcode = SystemZISD::VFENE_CC;
2708 CCValid = SystemZ::CCMASK_ANY;
2709 return true;
2710
2711 case Intrinsic::s390_vfenezbs:
2712 case Intrinsic::s390_vfenezhs:
2713 case Intrinsic::s390_vfenezfs:
2714 Opcode = SystemZISD::VFENEZ_CC;
2715 CCValid = SystemZ::CCMASK_ANY;
2716 return true;
2717
2718 case Intrinsic::s390_vistrbs:
2719 case Intrinsic::s390_vistrhs:
2720 case Intrinsic::s390_vistrfs:
2721 Opcode = SystemZISD::VISTR_CC;
2723 return true;
2724
2725 case Intrinsic::s390_vstrcbs:
2726 case Intrinsic::s390_vstrchs:
2727 case Intrinsic::s390_vstrcfs:
2728 Opcode = SystemZISD::VSTRC_CC;
2729 CCValid = SystemZ::CCMASK_ANY;
2730 return true;
2731
2732 case Intrinsic::s390_vstrczbs:
2733 case Intrinsic::s390_vstrczhs:
2734 case Intrinsic::s390_vstrczfs:
2735 Opcode = SystemZISD::VSTRCZ_CC;
2736 CCValid = SystemZ::CCMASK_ANY;
2737 return true;
2738
2739 case Intrinsic::s390_vstrsb:
2740 case Intrinsic::s390_vstrsh:
2741 case Intrinsic::s390_vstrsf:
2742 Opcode = SystemZISD::VSTRS_CC;
2743 CCValid = SystemZ::CCMASK_ANY;
2744 return true;
2745
2746 case Intrinsic::s390_vstrszb:
2747 case Intrinsic::s390_vstrszh:
2748 case Intrinsic::s390_vstrszf:
2749 Opcode = SystemZISD::VSTRSZ_CC;
2750 CCValid = SystemZ::CCMASK_ANY;
2751 return true;
2752
2753 case Intrinsic::s390_vfcedbs:
2754 case Intrinsic::s390_vfcesbs:
2755 Opcode = SystemZISD::VFCMPES;
2756 CCValid = SystemZ::CCMASK_VCMP;
2757 return true;
2758
2759 case Intrinsic::s390_vfchdbs:
2760 case Intrinsic::s390_vfchsbs:
2761 Opcode = SystemZISD::VFCMPHS;
2762 CCValid = SystemZ::CCMASK_VCMP;
2763 return true;
2764
2765 case Intrinsic::s390_vfchedbs:
2766 case Intrinsic::s390_vfchesbs:
2767 Opcode = SystemZISD::VFCMPHES;
2768 CCValid = SystemZ::CCMASK_VCMP;
2769 return true;
2770
2771 case Intrinsic::s390_vftcidb:
2772 case Intrinsic::s390_vftcisb:
2773 Opcode = SystemZISD::VFTCI;
2774 CCValid = SystemZ::CCMASK_VCMP;
2775 return true;
2776
2777 case Intrinsic::s390_tdc:
2778 Opcode = SystemZISD::TDC;
2779 CCValid = SystemZ::CCMASK_TDC;
2780 return true;
2781
2782 default:
2783 return false;
2784 }
2785}
2786
2787// Emit an intrinsic with chain and an explicit CC register result.
2789 unsigned Opcode) {
2790 // Copy all operands except the intrinsic ID.
2791 unsigned NumOps = Op.getNumOperands();
2793 Ops.reserve(NumOps - 1);
2794 Ops.push_back(Op.getOperand(0));
2795 for (unsigned I = 2; I < NumOps; ++I)
2796 Ops.push_back(Op.getOperand(I));
2797
2798 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2799 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2800 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2801 SDValue OldChain = SDValue(Op.getNode(), 1);
2802 SDValue NewChain = SDValue(Intr.getNode(), 1);
2803 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2804 return Intr.getNode();
2805}
2806
2807// Emit an intrinsic with an explicit CC register result.
2809 unsigned Opcode) {
2810 // Copy all operands except the intrinsic ID.
2811 SDLoc DL(Op);
2812 unsigned NumOps = Op.getNumOperands();
2814 Ops.reserve(NumOps - 1);
2815 for (unsigned I = 1; I < NumOps; ++I) {
2816 SDValue CurrOper = Op.getOperand(I);
2817 if (CurrOper.getValueType() == MVT::f16) {
2818 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2819 "Unhandled intrinsic with f16 operand.");
2820 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2821 }
2822 Ops.push_back(CurrOper);
2823 }
2824
2825 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2826 return Intr.getNode();
2827}
2828
2829// CC is a comparison that will be implemented using an integer or
2830// floating-point comparison. Return the condition code mask for
2831// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2832// unsigned comparisons and clear for signed ones. In the floating-point
2833// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2835#define CONV(X) \
2836 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2837 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2838 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2839
2840 switch (CC) {
2841 default:
2842 llvm_unreachable("Invalid integer condition!");
2843
2844 CONV(EQ);
2845 CONV(NE);
2846 CONV(GT);
2847 CONV(GE);
2848 CONV(LT);
2849 CONV(LE);
2850
2851 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2853 }
2854#undef CONV
2855}
2856
2857// If C can be converted to a comparison against zero, adjust the operands
2858// as necessary.
2859static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2860 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2861 return;
2862
2863 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2864 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2865 return;
2866
2867 int64_t Value = ConstOp1->getSExtValue();
2868 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2869 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2870 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2871 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2872 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2873 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2874 }
2875}
2876
2877// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2878// adjust the operands as necessary.
2879static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2880 Comparison &C) {
2881 // For us to make any changes, it must a comparison between a single-use
2882 // load and a constant.
2883 if (!C.Op0.hasOneUse() ||
2884 C.Op0.getOpcode() != ISD::LOAD ||
2885 C.Op1.getOpcode() != ISD::Constant)
2886 return;
2887
2888 // We must have an 8- or 16-bit load.
2889 auto *Load = cast<LoadSDNode>(C.Op0);
2890 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2891 if ((NumBits != 8 && NumBits != 16) ||
2892 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2893 return;
2894
2895 // The load must be an extending one and the constant must be within the
2896 // range of the unextended value.
2897 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2898 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2899 return;
2900 uint64_t Value = ConstOp1->getZExtValue();
2901 uint64_t Mask = (1 << NumBits) - 1;
2902 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2903 // Make sure that ConstOp1 is in range of C.Op0.
2904 int64_t SignedValue = ConstOp1->getSExtValue();
2905 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2906 return;
2907 if (C.ICmpType != SystemZICMP::SignedOnly) {
2908 // Unsigned comparison between two sign-extended values is equivalent
2909 // to unsigned comparison between two zero-extended values.
2910 Value &= Mask;
2911 } else if (NumBits == 8) {
2912 // Try to treat the comparison as unsigned, so that we can use CLI.
2913 // Adjust CCMask and Value as necessary.
2914 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2915 // Test whether the high bit of the byte is set.
2916 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2917 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2918 // Test whether the high bit of the byte is clear.
2919 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2920 else
2921 // No instruction exists for this combination.
2922 return;
2923 C.ICmpType = SystemZICMP::UnsignedOnly;
2924 }
2925 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2926 if (Value > Mask)
2927 return;
2928 // If the constant is in range, we can use any comparison.
2929 C.ICmpType = SystemZICMP::Any;
2930 } else
2931 return;
2932
2933 // Make sure that the first operand is an i32 of the right extension type.
2934 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2937 if (C.Op0.getValueType() != MVT::i32 ||
2938 Load->getExtensionType() != ExtType) {
2939 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2940 Load->getBasePtr(), Load->getPointerInfo(),
2941 Load->getMemoryVT(), Load->getAlign(),
2942 Load->getMemOperand()->getFlags());
2943 // Update the chain uses.
2944 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2945 }
2946
2947 // Make sure that the second operand is an i32 with the right value.
2948 if (C.Op1.getValueType() != MVT::i32 ||
2949 Value != ConstOp1->getZExtValue())
2950 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2951}
2952
2953// Return true if Op is either an unextended load, or a load suitable
2954// for integer register-memory comparisons of type ICmpType.
2955static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2956 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2957 if (Load) {
2958 // There are no instructions to compare a register with a memory byte.
2959 if (Load->getMemoryVT() == MVT::i8)
2960 return false;
2961 // Otherwise decide on extension type.
2962 switch (Load->getExtensionType()) {
2963 case ISD::NON_EXTLOAD:
2964 return true;
2965 case ISD::SEXTLOAD:
2966 return ICmpType != SystemZICMP::UnsignedOnly;
2967 case ISD::ZEXTLOAD:
2968 return ICmpType != SystemZICMP::SignedOnly;
2969 default:
2970 break;
2971 }
2972 }
2973 return false;
2974}
2975
2976// Return true if it is better to swap the operands of C.
2977static bool shouldSwapCmpOperands(const Comparison &C) {
2978 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2979 if (C.Op0.getValueType() == MVT::i128)
2980 return false;
2981 if (C.Op0.getValueType() == MVT::f128)
2982 return false;
2983
2984 // Always keep a floating-point constant second, since comparisons with
2985 // zero can use LOAD TEST and comparisons with other constants make a
2986 // natural memory operand.
2987 if (isa<ConstantFPSDNode>(C.Op1))
2988 return false;
2989
2990 // Never swap comparisons with zero since there are many ways to optimize
2991 // those later.
2992 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2993 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2994 return false;
2995
2996 // Also keep natural memory operands second if the loaded value is
2997 // only used here. Several comparisons have memory forms.
2998 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2999 return false;
3000
3001 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3002 // In that case we generally prefer the memory to be second.
3003 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3004 // The only exceptions are when the second operand is a constant and
3005 // we can use things like CHHSI.
3006 if (!ConstOp1)
3007 return true;
3008 // The unsigned memory-immediate instructions can handle 16-bit
3009 // unsigned integers.
3010 if (C.ICmpType != SystemZICMP::SignedOnly &&
3011 isUInt<16>(ConstOp1->getZExtValue()))
3012 return false;
3013 // The signed memory-immediate instructions can handle 16-bit
3014 // signed integers.
3015 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3016 isInt<16>(ConstOp1->getSExtValue()))
3017 return false;
3018 return true;
3019 }
3020
3021 // Try to promote the use of CGFR and CLGFR.
3022 unsigned Opcode0 = C.Op0.getOpcode();
3023 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3024 return true;
3025 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3026 return true;
3027 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3028 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3029 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3030 return true;
3031
3032 return false;
3033}
3034
3035// Check whether C tests for equality between X and Y and whether X - Y
3036// or Y - X is also computed. In that case it's better to compare the
3037// result of the subtraction against zero.
3039 Comparison &C) {
3040 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3041 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3042 for (SDNode *N : C.Op0->users()) {
3043 if (N->getOpcode() == ISD::SUB &&
3044 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3045 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3046 // Disable the nsw and nuw flags: the backend needs to handle
3047 // overflow as well during comparison elimination.
3048 N->dropFlags(SDNodeFlags::NoWrap);
3049 C.Op0 = SDValue(N, 0);
3050 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3051 return;
3052 }
3053 }
3054 }
3055}
3056
3057// Check whether C compares a floating-point value with zero and if that
3058// floating-point value is also negated. In this case we can use the
3059// negation to set CC, so avoiding separate LOAD AND TEST and
3060// LOAD (NEGATIVE/COMPLEMENT) instructions.
3061static void adjustForFNeg(Comparison &C) {
3062 // This optimization is invalid for strict comparisons, since FNEG
3063 // does not raise any exceptions.
3064 if (C.Chain)
3065 return;
3066 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3067 if (C1 && C1->isZero()) {
3068 for (SDNode *N : C.Op0->users()) {
3069 if (N->getOpcode() == ISD::FNEG) {
3070 C.Op0 = SDValue(N, 0);
3071 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3072 return;
3073 }
3074 }
3075 }
3076}
3077
3078// Check whether C compares (shl X, 32) with 0 and whether X is
3079// also sign-extended. In that case it is better to test the result
3080// of the sign extension using LTGFR.
3081//
3082// This case is important because InstCombine transforms a comparison
3083// with (sext (trunc X)) into a comparison with (shl X, 32).
3084static void adjustForLTGFR(Comparison &C) {
3085 // Check for a comparison between (shl X, 32) and 0.
3086 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3087 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3088 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3089 if (C1 && C1->getZExtValue() == 32) {
3090 SDValue ShlOp0 = C.Op0.getOperand(0);
3091 // See whether X has any SIGN_EXTEND_INREG uses.
3092 for (SDNode *N : ShlOp0->users()) {
3093 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3094 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3095 C.Op0 = SDValue(N, 0);
3096 return;
3097 }
3098 }
3099 }
3100 }
3101}
3102
3103// If C compares the truncation of an extending load, try to compare
3104// the untruncated value instead. This exposes more opportunities to
3105// reuse CC.
3106static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3107 Comparison &C) {
3108 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3109 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3110 C.Op1.getOpcode() == ISD::Constant &&
3111 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3112 C.Op1->getAsZExtVal() == 0) {
3113 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3114 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3115 C.Op0.getValueSizeInBits().getFixedValue()) {
3116 unsigned Type = L->getExtensionType();
3117 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3118 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3119 C.Op0 = C.Op0.getOperand(0);
3120 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3121 }
3122 }
3123 }
3124}
3125
3126// Return true if shift operation N has an in-range constant shift value.
3127// Store it in ShiftVal if so.
3128static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3129 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3130 if (!Shift)
3131 return false;
3132
3133 uint64_t Amount = Shift->getZExtValue();
3134 if (Amount >= N.getValueSizeInBits())
3135 return false;
3136
3137 ShiftVal = Amount;
3138 return true;
3139}
3140
3141// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3142// instruction and whether the CC value is descriptive enough to handle
3143// a comparison of type Opcode between the AND result and CmpVal.
3144// CCMask says which comparison result is being tested and BitSize is
3145// the number of bits in the operands. If TEST UNDER MASK can be used,
3146// return the corresponding CC mask, otherwise return 0.
3147static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3148 uint64_t Mask, uint64_t CmpVal,
3149 unsigned ICmpType) {
3150 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3151
3152 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3153 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3154 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3155 return 0;
3156
3157 // Work out the masks for the lowest and highest bits.
3159 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3160
3161 // Signed ordered comparisons are effectively unsigned if the sign
3162 // bit is dropped.
3163 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3164
3165 // Check for equality comparisons with 0, or the equivalent.
3166 if (CmpVal == 0) {
3167 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3169 if (CCMask == SystemZ::CCMASK_CMP_NE)
3171 }
3172 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3173 if (CCMask == SystemZ::CCMASK_CMP_LT)
3175 if (CCMask == SystemZ::CCMASK_CMP_GE)
3177 }
3178 if (EffectivelyUnsigned && CmpVal < Low) {
3179 if (CCMask == SystemZ::CCMASK_CMP_LE)
3181 if (CCMask == SystemZ::CCMASK_CMP_GT)
3183 }
3184
3185 // Check for equality comparisons with the mask, or the equivalent.
3186 if (CmpVal == Mask) {
3187 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3189 if (CCMask == SystemZ::CCMASK_CMP_NE)
3191 }
3192 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3193 if (CCMask == SystemZ::CCMASK_CMP_GT)
3195 if (CCMask == SystemZ::CCMASK_CMP_LE)
3197 }
3198 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3199 if (CCMask == SystemZ::CCMASK_CMP_GE)
3201 if (CCMask == SystemZ::CCMASK_CMP_LT)
3203 }
3204
3205 // Check for ordered comparisons with the top bit.
3206 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3207 if (CCMask == SystemZ::CCMASK_CMP_LE)
3209 if (CCMask == SystemZ::CCMASK_CMP_GT)
3211 }
3212 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3213 if (CCMask == SystemZ::CCMASK_CMP_LT)
3215 if (CCMask == SystemZ::CCMASK_CMP_GE)
3217 }
3218
3219 // If there are just two bits, we can do equality checks for Low and High
3220 // as well.
3221 if (Mask == Low + High) {
3222 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3224 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3226 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3228 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3230 }
3231
3232 // Looks like we've exhausted our options.
3233 return 0;
3234}
3235
3236// See whether C can be implemented as a TEST UNDER MASK instruction.
3237// Update the arguments with the TM version if so.
3239 Comparison &C) {
3240 // Use VECTOR TEST UNDER MASK for i128 operations.
3241 if (C.Op0.getValueType() == MVT::i128) {
3242 // We can use VTM for EQ/NE comparisons of x & y against 0.
3243 if (C.Op0.getOpcode() == ISD::AND &&
3244 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3245 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3246 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3247 if (Mask && Mask->getAPIntValue() == 0) {
3248 C.Opcode = SystemZISD::VTM;
3249 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3250 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3251 C.CCValid = SystemZ::CCMASK_VCMP;
3252 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3253 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3254 else
3255 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3256 }
3257 }
3258 return;
3259 }
3260
3261 // Check that we have a comparison with a constant.
3262 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3263 if (!ConstOp1)
3264 return;
3265 uint64_t CmpVal = ConstOp1->getZExtValue();
3266
3267 // Check whether the nonconstant input is an AND with a constant mask.
3268 Comparison NewC(C);
3269 uint64_t MaskVal;
3270 ConstantSDNode *Mask = nullptr;
3271 if (C.Op0.getOpcode() == ISD::AND) {
3272 NewC.Op0 = C.Op0.getOperand(0);
3273 NewC.Op1 = C.Op0.getOperand(1);
3274 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3275 if (!Mask)
3276 return;
3277 MaskVal = Mask->getZExtValue();
3278 } else {
3279 // There is no instruction to compare with a 64-bit immediate
3280 // so use TMHH instead if possible. We need an unsigned ordered
3281 // comparison with an i64 immediate.
3282 if (NewC.Op0.getValueType() != MVT::i64 ||
3283 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3284 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3285 NewC.ICmpType == SystemZICMP::SignedOnly)
3286 return;
3287 // Convert LE and GT comparisons into LT and GE.
3288 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3289 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3290 if (CmpVal == uint64_t(-1))
3291 return;
3292 CmpVal += 1;
3293 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3294 }
3295 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3296 // be masked off without changing the result.
3297 MaskVal = -(CmpVal & -CmpVal);
3298 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3299 }
3300 if (!MaskVal)
3301 return;
3302
3303 // Check whether the combination of mask, comparison value and comparison
3304 // type are suitable.
3305 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3306 unsigned NewCCMask, ShiftVal;
3307 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3308 NewC.Op0.getOpcode() == ISD::SHL &&
3309 isSimpleShift(NewC.Op0, ShiftVal) &&
3310 (MaskVal >> ShiftVal != 0) &&
3311 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3312 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3313 MaskVal >> ShiftVal,
3314 CmpVal >> ShiftVal,
3315 SystemZICMP::Any))) {
3316 NewC.Op0 = NewC.Op0.getOperand(0);
3317 MaskVal >>= ShiftVal;
3318 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3319 NewC.Op0.getOpcode() == ISD::SRL &&
3320 isSimpleShift(NewC.Op0, ShiftVal) &&
3321 (MaskVal << ShiftVal != 0) &&
3322 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3323 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3324 MaskVal << ShiftVal,
3325 CmpVal << ShiftVal,
3327 NewC.Op0 = NewC.Op0.getOperand(0);
3328 MaskVal <<= ShiftVal;
3329 } else {
3330 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3331 NewC.ICmpType);
3332 if (!NewCCMask)
3333 return;
3334 }
3335
3336 // Go ahead and make the change.
3337 C.Opcode = SystemZISD::TM;
3338 C.Op0 = NewC.Op0;
3339 if (Mask && Mask->getZExtValue() == MaskVal)
3340 C.Op1 = SDValue(Mask, 0);
3341 else
3342 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3343 C.CCValid = SystemZ::CCMASK_TM;
3344 C.CCMask = NewCCMask;
3345}
3346
3347// Implement i128 comparison in vector registers.
3348static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3349 Comparison &C) {
3350 if (C.Opcode != SystemZISD::ICMP)
3351 return;
3352 if (C.Op0.getValueType() != MVT::i128)
3353 return;
3354
3355 // Recognize vector comparison reductions.
3356 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3357 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3358 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3359 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3360 bool CmpNull = isNullConstant(C.Op1);
3361 SDValue Src = peekThroughBitcasts(C.Op0);
3362 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3363 Src = Src.getOperand(0);
3364 CmpNull = !CmpNull;
3365 }
3366 unsigned Opcode = 0;
3367 if (Src.hasOneUse()) {
3368 switch (Src.getOpcode()) {
3369 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3370 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3371 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3372 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3373 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3374 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3375 default: break;
3376 }
3377 }
3378 if (Opcode) {
3379 C.Opcode = Opcode;
3380 C.Op0 = Src->getOperand(0);
3381 C.Op1 = Src->getOperand(1);
3382 C.CCValid = SystemZ::CCMASK_VCMP;
3384 if (!CmpEq)
3385 C.CCMask ^= C.CCValid;
3386 return;
3387 }
3388 }
3389
3390 // Everything below here is not useful if we have native i128 compares.
3391 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3392 return;
3393
3394 // (In-)Equality comparisons can be implemented via VCEQGS.
3395 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3396 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3397 C.Opcode = SystemZISD::VICMPES;
3398 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3399 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3400 C.CCValid = SystemZ::CCMASK_VCMP;
3401 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3402 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3403 else
3404 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3405 return;
3406 }
3407
3408 // Normalize other comparisons to GT.
3409 bool Swap = false, Invert = false;
3410 switch (C.CCMask) {
3411 case SystemZ::CCMASK_CMP_GT: break;
3412 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3413 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3414 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3415 default: llvm_unreachable("Invalid integer condition!");
3416 }
3417 if (Swap)
3418 std::swap(C.Op0, C.Op1);
3419
3420 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3421 C.Opcode = SystemZISD::UCMP128HI;
3422 else
3423 C.Opcode = SystemZISD::SCMP128HI;
3424 C.CCValid = SystemZ::CCMASK_ANY;
3425 C.CCMask = SystemZ::CCMASK_1;
3426
3427 if (Invert)
3428 C.CCMask ^= C.CCValid;
3429}
3430
3431// See whether the comparison argument contains a redundant AND
3432// and remove it if so. This sometimes happens due to the generic
3433// BRCOND expansion.
3435 Comparison &C) {
3436 if (C.Op0.getOpcode() != ISD::AND)
3437 return;
3438 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3439 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3440 return;
3441 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3442 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3443 return;
3444
3445 C.Op0 = C.Op0.getOperand(0);
3446}
3447
3448// Return a Comparison that tests the condition-code result of intrinsic
3449// node Call against constant integer CC using comparison code Cond.
3450// Opcode is the opcode of the SystemZISD operation for the intrinsic
3451// and CCValid is the set of possible condition-code results.
3452static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3453 SDValue Call, unsigned CCValid, uint64_t CC,
3455 Comparison C(Call, SDValue(), SDValue());
3456 C.Opcode = Opcode;
3457 C.CCValid = CCValid;
3458 if (Cond == ISD::SETEQ)
3459 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3460 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3461 else if (Cond == ISD::SETNE)
3462 // ...and the inverse of that.
3463 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3464 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3465 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3466 // always true for CC>3.
3467 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3468 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3469 // ...and the inverse of that.
3470 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3471 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3472 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3473 // always true for CC>3.
3474 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3475 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3476 // ...and the inverse of that.
3477 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3478 else
3479 llvm_unreachable("Unexpected integer comparison type");
3480 C.CCMask &= CCValid;
3481 return C;
3482}
3483
3484// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3485static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3486 ISD::CondCode Cond, const SDLoc &DL,
3487 SDValue Chain = SDValue(),
3488 bool IsSignaling = false) {
3489 if (CmpOp1.getOpcode() == ISD::Constant) {
3490 assert(!Chain);
3491 unsigned Opcode, CCValid;
3492 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3493 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3494 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3495 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3496 CmpOp1->getAsZExtVal(), Cond);
3497 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3498 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3499 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3500 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3501 CmpOp1->getAsZExtVal(), Cond);
3502 }
3503 Comparison C(CmpOp0, CmpOp1, Chain);
3504 C.CCMask = CCMaskForCondCode(Cond);
3505 if (C.Op0.getValueType().isFloatingPoint()) {
3506 C.CCValid = SystemZ::CCMASK_FCMP;
3507 if (!C.Chain)
3508 C.Opcode = SystemZISD::FCMP;
3509 else if (!IsSignaling)
3510 C.Opcode = SystemZISD::STRICT_FCMP;
3511 else
3512 C.Opcode = SystemZISD::STRICT_FCMPS;
3514 } else {
3515 assert(!C.Chain);
3516 C.CCValid = SystemZ::CCMASK_ICMP;
3517 C.Opcode = SystemZISD::ICMP;
3518 // Choose the type of comparison. Equality and inequality tests can
3519 // use either signed or unsigned comparisons. The choice also doesn't
3520 // matter if both sign bits are known to be clear. In those cases we
3521 // want to give the main isel code the freedom to choose whichever
3522 // form fits best.
3523 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3524 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3525 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3526 C.ICmpType = SystemZICMP::Any;
3527 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3528 C.ICmpType = SystemZICMP::UnsignedOnly;
3529 else
3530 C.ICmpType = SystemZICMP::SignedOnly;
3531 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3532 adjustForRedundantAnd(DAG, DL, C);
3533 adjustZeroCmp(DAG, DL, C);
3534 adjustSubwordCmp(DAG, DL, C);
3535 adjustForSubtraction(DAG, DL, C);
3537 adjustICmpTruncate(DAG, DL, C);
3538 }
3539
3540 if (shouldSwapCmpOperands(C)) {
3541 std::swap(C.Op0, C.Op1);
3542 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3543 }
3544
3546 adjustICmp128(DAG, DL, C);
3547 return C;
3548}
3549
3550// Emit the comparison instruction described by C.
3551static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3552 if (!C.Op1.getNode()) {
3553 SDNode *Node;
3554 switch (C.Op0.getOpcode()) {
3556 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3557 return SDValue(Node, 0);
3559 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3560 return SDValue(Node, Node->getNumValues() - 1);
3561 default:
3562 llvm_unreachable("Invalid comparison operands");
3563 }
3564 }
3565 if (C.Opcode == SystemZISD::ICMP)
3566 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3567 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3568 if (C.Opcode == SystemZISD::TM) {
3569 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3571 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3572 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3573 }
3574 if (C.Opcode == SystemZISD::VICMPES ||
3575 C.Opcode == SystemZISD::VICMPHS ||
3576 C.Opcode == SystemZISD::VICMPHLS ||
3577 C.Opcode == SystemZISD::VFCMPES ||
3578 C.Opcode == SystemZISD::VFCMPHS ||
3579 C.Opcode == SystemZISD::VFCMPHES) {
3580 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3581 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3582 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3583 return SDValue(Val.getNode(), 1);
3584 }
3585 if (C.Chain) {
3586 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3587 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3588 }
3589 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3590}
3591
3592// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3593// 64 bits. Extend is the extension type to use. Store the high part
3594// in Hi and the low part in Lo.
3595static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3596 SDValue Op0, SDValue Op1, SDValue &Hi,
3597 SDValue &Lo) {
3598 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3599 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3600 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3601 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3602 DAG.getConstant(32, DL, MVT::i64));
3603 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3604 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3605}
3606
3607// Lower a binary operation that produces two VT results, one in each
3608// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3609// and Opcode performs the GR128 operation. Store the even register result
3610// in Even and the odd register result in Odd.
3611static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3612 unsigned Opcode, SDValue Op0, SDValue Op1,
3613 SDValue &Even, SDValue &Odd) {
3614 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3615 bool Is32Bit = is32Bit(VT);
3616 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3617 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3618}
3619
3620// Return an i32 value that is 1 if the CC value produced by CCReg is
3621// in the mask CCMask and 0 otherwise. CC is known to have a value
3622// in CCValid, so other values can be ignored.
3623static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3624 unsigned CCValid, unsigned CCMask) {
3625 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3626 DAG.getConstant(0, DL, MVT::i32),
3627 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3628 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3629 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3630}
3631
3632// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3633// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3634// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3635// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3636// floating-point comparisons.
3639 switch (CC) {
3640 case ISD::SETOEQ:
3641 case ISD::SETEQ:
3642 switch (Mode) {
3643 case CmpMode::Int: return SystemZISD::VICMPE;
3644 case CmpMode::FP: return SystemZISD::VFCMPE;
3647 }
3648 llvm_unreachable("Bad mode");
3649
3650 case ISD::SETOGE:
3651 case ISD::SETGE:
3652 switch (Mode) {
3653 case CmpMode::Int: return 0;
3654 case CmpMode::FP: return SystemZISD::VFCMPHE;
3657 }
3658 llvm_unreachable("Bad mode");
3659
3660 case ISD::SETOGT:
3661 case ISD::SETGT:
3662 switch (Mode) {
3663 case CmpMode::Int: return SystemZISD::VICMPH;
3664 case CmpMode::FP: return SystemZISD::VFCMPH;
3667 }
3668 llvm_unreachable("Bad mode");
3669
3670 case ISD::SETUGT:
3671 switch (Mode) {
3672 case CmpMode::Int: return SystemZISD::VICMPHL;
3673 case CmpMode::FP: return 0;
3674 case CmpMode::StrictFP: return 0;
3675 case CmpMode::SignalingFP: return 0;
3676 }
3677 llvm_unreachable("Bad mode");
3678
3679 default:
3680 return 0;
3681 }
3682}
3683
3684// Return the SystemZISD vector comparison operation for CC or its inverse,
3685// or 0 if neither can be done directly. Indicate in Invert whether the
3686// result is for the inverse of CC. Mode is as above.
3688 bool &Invert) {
3689 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3690 Invert = false;
3691 return Opcode;
3692 }
3693
3694 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3695 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3696 Invert = true;
3697 return Opcode;
3698 }
3699
3700 return 0;
3701}
3702
3703// Return a v2f64 that contains the extended form of elements Start and Start+1
3704// of v4f32 value Op. If Chain is nonnull, return the strict form.
3705static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3706 SDValue Op, SDValue Chain) {
3707 int Mask[] = { Start, -1, Start + 1, -1 };
3708 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3709 if (Chain) {
3710 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3711 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3712 }
3713 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3714}
3715
3716// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3717// producing a result of type VT. If Chain is nonnull, return the strict form.
3718SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3719 const SDLoc &DL, EVT VT,
3720 SDValue CmpOp0,
3721 SDValue CmpOp1,
3722 SDValue Chain) const {
3723 // There is no hardware support for v4f32 (unless we have the vector
3724 // enhancements facility 1), so extend the vector into two v2f64s
3725 // and compare those.
3726 if (CmpOp0.getValueType() == MVT::v4f32 &&
3727 !Subtarget.hasVectorEnhancements1()) {
3728 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3729 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3730 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3731 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3732 if (Chain) {
3733 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3734 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3735 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3736 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3737 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3738 H1.getValue(1), L1.getValue(1),
3739 HRes.getValue(1), LRes.getValue(1) };
3740 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3741 SDValue Ops[2] = { Res, NewChain };
3742 return DAG.getMergeValues(Ops, DL);
3743 }
3744 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3745 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3746 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3747 }
3748 if (Chain) {
3749 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3750 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3751 }
3752 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3753}
3754
3755// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3756// an integer mask of type VT. If Chain is nonnull, we have a strict
3757// floating-point comparison. If in addition IsSignaling is true, we have
3758// a strict signaling floating-point comparison.
3759SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3760 const SDLoc &DL, EVT VT,
3761 ISD::CondCode CC,
3762 SDValue CmpOp0,
3763 SDValue CmpOp1,
3764 SDValue Chain,
3765 bool IsSignaling) const {
3766 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3767 assert (!Chain || IsFP);
3768 assert (!IsSignaling || Chain);
3769 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3770 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3771 bool Invert = false;
3772 SDValue Cmp;
3773 switch (CC) {
3774 // Handle tests for order using (or (ogt y x) (oge x y)).
3775 case ISD::SETUO:
3776 Invert = true;
3777 [[fallthrough]];
3778 case ISD::SETO: {
3779 assert(IsFP && "Unexpected integer comparison");
3780 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3781 DL, VT, CmpOp1, CmpOp0, Chain);
3782 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3783 DL, VT, CmpOp0, CmpOp1, Chain);
3784 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3785 if (Chain)
3786 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3787 LT.getValue(1), GE.getValue(1));
3788 break;
3789 }
3790
3791 // Handle <> tests using (or (ogt y x) (ogt x y)).
3792 case ISD::SETUEQ:
3793 Invert = true;
3794 [[fallthrough]];
3795 case ISD::SETONE: {
3796 assert(IsFP && "Unexpected integer comparison");
3797 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3798 DL, VT, CmpOp1, CmpOp0, Chain);
3799 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3800 DL, VT, CmpOp0, CmpOp1, Chain);
3801 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3802 if (Chain)
3803 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3804 LT.getValue(1), GT.getValue(1));
3805 break;
3806 }
3807
3808 // Otherwise a single comparison is enough. It doesn't really
3809 // matter whether we try the inversion or the swap first, since
3810 // there are no cases where both work.
3811 default:
3812 // Optimize sign-bit comparisons to signed compares.
3813 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3815 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3816 APInt Mask;
3817 if (CmpOp0.getOpcode() == ISD::AND
3818 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3819 && Mask == APInt::getSignMask(EltSize)) {
3820 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3821 CmpOp0 = CmpOp0.getOperand(0);
3822 }
3823 }
3824 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3825 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3826 else {
3828 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3829 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3830 else
3831 llvm_unreachable("Unhandled comparison");
3832 }
3833 if (Chain)
3834 Chain = Cmp.getValue(1);
3835 break;
3836 }
3837 if (Invert) {
3838 SDValue Mask =
3839 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3840 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3841 }
3842 if (Chain && Chain.getNode() != Cmp.getNode()) {
3843 SDValue Ops[2] = { Cmp, Chain };
3844 Cmp = DAG.getMergeValues(Ops, DL);
3845 }
3846 return Cmp;
3847}
3848
3849SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3850 SelectionDAG &DAG) const {
3851 SDValue CmpOp0 = Op.getOperand(0);
3852 SDValue CmpOp1 = Op.getOperand(1);
3853 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3854 SDLoc DL(Op);
3855 EVT VT = Op.getValueType();
3856 if (VT.isVector())
3857 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3858
3859 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3860 SDValue CCReg = emitCmp(DAG, DL, C);
3861 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3862}
3863
3864SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3865 SelectionDAG &DAG,
3866 bool IsSignaling) const {
3867 SDValue Chain = Op.getOperand(0);
3868 SDValue CmpOp0 = Op.getOperand(1);
3869 SDValue CmpOp1 = Op.getOperand(2);
3870 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3871 SDLoc DL(Op);
3872 EVT VT = Op.getNode()->getValueType(0);
3873 if (VT.isVector()) {
3874 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3875 Chain, IsSignaling);
3876 return Res.getValue(Op.getResNo());
3877 }
3878
3879 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3880 SDValue CCReg = emitCmp(DAG, DL, C);
3881 CCReg->setFlags(Op->getFlags());
3882 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3883 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3884 return DAG.getMergeValues(Ops, DL);
3885}
3886
3887SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3888 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3889 SDValue CmpOp0 = Op.getOperand(2);
3890 SDValue CmpOp1 = Op.getOperand(3);
3891 SDValue Dest = Op.getOperand(4);
3892 SDLoc DL(Op);
3893
3894 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3895 SDValue CCReg = emitCmp(DAG, DL, C);
3896 return DAG.getNode(
3897 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3898 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3899 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3900}
3901
3902// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3903// allowing Pos and Neg to be wider than CmpOp.
3904static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3905 return (Neg.getOpcode() == ISD::SUB &&
3906 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3907 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3908 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3909 Pos.getOperand(0) == CmpOp)));
3910}
3911
3912// Return the absolute or negative absolute of Op; IsNegative decides which.
3914 bool IsNegative) {
3915 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3916 if (IsNegative)
3917 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3918 DAG.getConstant(0, DL, Op.getValueType()), Op);
3919 return Op;
3920}
3921
3923 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3924 EVT VT = MVT::i128;
3925 unsigned Op;
3926
3927 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3928 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3929 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3930 std::swap(TrueOp, FalseOp);
3931 C.CCMask ^= C.CCValid;
3932 }
3933 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3934 std::swap(C.Op0, C.Op1);
3935 C.CCMask = SystemZ::CCMASK_CMP_GT;
3936 }
3937 switch (C.CCMask) {
3940 break;
3942 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3944 else
3946 break;
3947 default:
3948 llvm_unreachable("Unhandled comparison");
3949 break;
3950 }
3951
3952 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
3953 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
3954 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
3955 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
3956}
3957
3958SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3959 SelectionDAG &DAG) const {
3960 SDValue CmpOp0 = Op.getOperand(0);
3961 SDValue CmpOp1 = Op.getOperand(1);
3962 SDValue TrueOp = Op.getOperand(2);
3963 SDValue FalseOp = Op.getOperand(3);
3964 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3965 SDLoc DL(Op);
3966
3967 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
3968 // legalizer, as it will be handled according to the type of the resulting
3969 // value. Extend them here if needed.
3970 if (CmpOp0.getSimpleValueType() == MVT::f16) {
3971 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
3972 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
3973 }
3974
3975 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3976
3977 // Check for absolute and negative-absolute selections, including those
3978 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3979 // This check supplements the one in DAGCombiner.
3980 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3981 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3982 C.Op1.getOpcode() == ISD::Constant &&
3983 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3984 C.Op1->getAsZExtVal() == 0) {
3985 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3986 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3987 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3988 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3989 }
3990
3991 if (Subtarget.hasVectorEnhancements3() &&
3992 C.Opcode == SystemZISD::ICMP &&
3993 C.Op0.getValueType() == MVT::i128 &&
3994 TrueOp.getValueType() == MVT::i128) {
3995 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
3996 }
3997
3998 SDValue CCReg = emitCmp(DAG, DL, C);
3999 SDValue Ops[] = {TrueOp, FalseOp,
4000 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4001 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4002
4003 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4004}
4005
4006SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4007 SelectionDAG &DAG) const {
4008 SDLoc DL(Node);
4009 const GlobalValue *GV = Node->getGlobal();
4010 int64_t Offset = Node->getOffset();
4011 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4013
4015 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4016 if (isInt<32>(Offset)) {
4017 // Assign anchors at 1<<12 byte boundaries.
4018 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4019 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4020 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4021
4022 // The offset can be folded into the address if it is aligned to a
4023 // halfword.
4024 Offset -= Anchor;
4025 if (Offset != 0 && (Offset & 1) == 0) {
4026 SDValue Full =
4027 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4028 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4029 Offset = 0;
4030 }
4031 } else {
4032 // Conservatively load a constant offset greater than 32 bits into a
4033 // register below.
4034 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4035 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4036 }
4037 } else if (Subtarget.isTargetELF()) {
4038 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4039 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4040 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4042 } else if (Subtarget.isTargetzOS()) {
4043 Result = getADAEntry(DAG, GV, DL, PtrVT);
4044 } else
4045 llvm_unreachable("Unexpected Subtarget");
4046
4047 // If there was a non-zero offset that we didn't fold, create an explicit
4048 // addition for it.
4049 if (Offset != 0)
4050 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4051 DAG.getSignedConstant(Offset, DL, PtrVT));
4052
4053 return Result;
4054}
4055
4056SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4057 SelectionDAG &DAG,
4058 unsigned Opcode,
4059 SDValue GOTOffset) const {
4060 SDLoc DL(Node);
4061 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4062 SDValue Chain = DAG.getEntryNode();
4063 SDValue Glue;
4064
4067 report_fatal_error("In GHC calling convention TLS is not supported");
4068
4069 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4070 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4071 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4072 Glue = Chain.getValue(1);
4073 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4074 Glue = Chain.getValue(1);
4075
4076 // The first call operand is the chain and the second is the TLS symbol.
4078 Ops.push_back(Chain);
4079 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4080 Node->getValueType(0),
4081 0, 0));
4082
4083 // Add argument registers to the end of the list so that they are
4084 // known live into the call.
4085 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4086 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4087
4088 // Add a register mask operand representing the call-preserved registers.
4089 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4090 const uint32_t *Mask =
4091 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4092 assert(Mask && "Missing call preserved mask for calling convention");
4093 Ops.push_back(DAG.getRegisterMask(Mask));
4094
4095 // Glue the call to the argument copies.
4096 Ops.push_back(Glue);
4097
4098 // Emit the call.
4099 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4100 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4101 Glue = Chain.getValue(1);
4102
4103 // Copy the return value from %r2.
4104 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4105}
4106
4107SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4108 SelectionDAG &DAG) const {
4109 SDValue Chain = DAG.getEntryNode();
4110 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4111
4112 // The high part of the thread pointer is in access register 0.
4113 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4114 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4115
4116 // The low part of the thread pointer is in access register 1.
4117 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4118 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4119
4120 // Merge them into a single 64-bit address.
4121 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4122 DAG.getConstant(32, DL, PtrVT));
4123 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4124}
4125
4126SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4127 SelectionDAG &DAG) const {
4128 if (DAG.getTarget().useEmulatedTLS())
4129 return LowerToTLSEmulatedModel(Node, DAG);
4130 SDLoc DL(Node);
4131 const GlobalValue *GV = Node->getGlobal();
4132 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4133 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4134
4137 report_fatal_error("In GHC calling convention TLS is not supported");
4138
4139 SDValue TP = lowerThreadPointer(DL, DAG);
4140
4141 // Get the offset of GA from the thread pointer, based on the TLS model.
4143 switch (model) {
4145 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4146 SystemZConstantPoolValue *CPV =
4148
4149 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4150 Offset = DAG.getLoad(
4151 PtrVT, DL, DAG.getEntryNode(), Offset,
4153
4154 // Call __tls_get_offset to retrieve the offset.
4155 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4156 break;
4157 }
4158
4160 // Load the GOT offset of the module ID.
4161 SystemZConstantPoolValue *CPV =
4163
4164 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4165 Offset = DAG.getLoad(
4166 PtrVT, DL, DAG.getEntryNode(), Offset,
4168
4169 // Call __tls_get_offset to retrieve the module base offset.
4170 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4171
4172 // Note: The SystemZLDCleanupPass will remove redundant computations
4173 // of the module base offset. Count total number of local-dynamic
4174 // accesses to trigger execution of that pass.
4175 SystemZMachineFunctionInfo* MFI =
4176 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4178
4179 // Add the per-symbol offset.
4181
4182 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4183 DTPOffset = DAG.getLoad(
4184 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4186
4187 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4188 break;
4189 }
4190
4191 case TLSModel::InitialExec: {
4192 // Load the offset from the GOT.
4193 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4196 Offset =
4197 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4199 break;
4200 }
4201
4202 case TLSModel::LocalExec: {
4203 // Force the offset into the constant pool and load it from there.
4204 SystemZConstantPoolValue *CPV =
4206
4207 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4208 Offset = DAG.getLoad(
4209 PtrVT, DL, DAG.getEntryNode(), Offset,
4211 break;
4212 }
4213 }
4214
4215 // Add the base and offset together.
4216 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4217}
4218
4219SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4220 SelectionDAG &DAG) const {
4221 SDLoc DL(Node);
4222 const BlockAddress *BA = Node->getBlockAddress();
4223 int64_t Offset = Node->getOffset();
4224 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4225
4226 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4227 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4228 return Result;
4229}
4230
4231SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4232 SelectionDAG &DAG) const {
4233 SDLoc DL(JT);
4234 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4235 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4236
4237 // Use LARL to load the address of the table.
4238 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4239}
4240
4241SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4242 SelectionDAG &DAG) const {
4243 SDLoc DL(CP);
4244 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4245
4247 if (CP->isMachineConstantPoolEntry())
4248 Result =
4249 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4250 else
4251 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4252 CP->getOffset());
4253
4254 // Use LARL to load the address of the constant pool entry.
4255 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4256}
4257
4258SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4259 SelectionDAG &DAG) const {
4260 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4261 MachineFunction &MF = DAG.getMachineFunction();
4262 MachineFrameInfo &MFI = MF.getFrameInfo();
4263 MFI.setFrameAddressIsTaken(true);
4264
4265 SDLoc DL(Op);
4266 unsigned Depth = Op.getConstantOperandVal(0);
4267 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4268
4269 // By definition, the frame address is the address of the back chain. (In
4270 // the case of packed stack without backchain, return the address where the
4271 // backchain would have been stored. This will either be an unused space or
4272 // contain a saved register).
4273 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4274 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4275
4276 if (Depth > 0) {
4277 // FIXME The frontend should detect this case.
4278 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4279 report_fatal_error("Unsupported stack frame traversal count");
4280
4281 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4282 while (Depth--) {
4283 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4284 MachinePointerInfo());
4285 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4286 }
4287 }
4288
4289 return BackChain;
4290}
4291
4292SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4293 SelectionDAG &DAG) const {
4294 MachineFunction &MF = DAG.getMachineFunction();
4295 MachineFrameInfo &MFI = MF.getFrameInfo();
4296 MFI.setReturnAddressIsTaken(true);
4297
4298 SDLoc DL(Op);
4299 unsigned Depth = Op.getConstantOperandVal(0);
4300 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4301
4302 if (Depth > 0) {
4303 // FIXME The frontend should detect this case.
4304 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4305 report_fatal_error("Unsupported stack frame traversal count");
4306
4307 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4308 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4309 int Offset = TFL->getReturnAddressOffset(MF);
4310 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4311 DAG.getSignedConstant(Offset, DL, PtrVT));
4312 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4313 MachinePointerInfo());
4314 }
4315
4316 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4317 // implicit live-in.
4318 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4320 &SystemZ::GR64BitRegClass);
4321 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4322}
4323
4324SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4325 SelectionDAG &DAG) const {
4326 SDLoc DL(Op);
4327 SDValue In = Op.getOperand(0);
4328 EVT InVT = In.getValueType();
4329 EVT ResVT = Op.getValueType();
4330
4331 // Convert loads directly. This is normally done by DAGCombiner,
4332 // but we need this case for bitcasts that are created during lowering
4333 // and which are then lowered themselves.
4334 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4335 if (ISD::isNormalLoad(LoadN)) {
4336 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4337 LoadN->getBasePtr(), LoadN->getMemOperand());
4338 // Update the chain uses.
4339 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4340 return NewLoad;
4341 }
4342
4343 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4344 SDValue In64;
4345 if (Subtarget.hasHighWord()) {
4346 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4347 MVT::i64);
4348 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4349 MVT::i64, SDValue(U64, 0), In);
4350 } else {
4351 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4352 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4353 DAG.getConstant(32, DL, MVT::i64));
4354 }
4355 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4356 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4357 DL, MVT::f32, Out64);
4358 }
4359 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4360 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4361 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4362 MVT::f64, SDValue(U64, 0), In);
4363 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4364 if (Subtarget.hasHighWord())
4365 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4366 MVT::i32, Out64);
4367 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4368 DAG.getConstant(32, DL, MVT::i64));
4369 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4370 }
4371 llvm_unreachable("Unexpected bitcast combination");
4372}
4373
4374SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4375 SelectionDAG &DAG) const {
4376
4377 if (Subtarget.isTargetXPLINK64())
4378 return lowerVASTART_XPLINK(Op, DAG);
4379 else
4380 return lowerVASTART_ELF(Op, DAG);
4381}
4382
4383SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4384 SelectionDAG &DAG) const {
4385 MachineFunction &MF = DAG.getMachineFunction();
4386 SystemZMachineFunctionInfo *FuncInfo =
4387 MF.getInfo<SystemZMachineFunctionInfo>();
4388
4389 SDLoc DL(Op);
4390
4391 // vastart just stores the address of the VarArgsFrameIndex slot into the
4392 // memory location argument.
4393 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4394 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4395 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4396 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4397 MachinePointerInfo(SV));
4398}
4399
4400SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4401 SelectionDAG &DAG) const {
4402 MachineFunction &MF = DAG.getMachineFunction();
4403 SystemZMachineFunctionInfo *FuncInfo =
4404 MF.getInfo<SystemZMachineFunctionInfo>();
4405 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4406
4407 SDValue Chain = Op.getOperand(0);
4408 SDValue Addr = Op.getOperand(1);
4409 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4410 SDLoc DL(Op);
4411
4412 // The initial values of each field.
4413 const unsigned NumFields = 4;
4414 SDValue Fields[NumFields] = {
4415 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4416 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4417 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4418 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4419 };
4420
4421 // Store each field into its respective slot.
4422 SDValue MemOps[NumFields];
4423 unsigned Offset = 0;
4424 for (unsigned I = 0; I < NumFields; ++I) {
4425 SDValue FieldAddr = Addr;
4426 if (Offset != 0)
4427 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4429 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4430 MachinePointerInfo(SV, Offset));
4431 Offset += 8;
4432 }
4433 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4434}
4435
4436SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4437 SelectionDAG &DAG) const {
4438 SDValue Chain = Op.getOperand(0);
4439 SDValue DstPtr = Op.getOperand(1);
4440 SDValue SrcPtr = Op.getOperand(2);
4441 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4442 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4443 SDLoc DL(Op);
4444
4445 uint32_t Sz =
4446 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4447 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4448 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4449 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4450 MachinePointerInfo(SrcSV));
4451}
4452
4453SDValue
4454SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4455 SelectionDAG &DAG) const {
4456 if (Subtarget.isTargetXPLINK64())
4457 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4458 else
4459 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4460}
4461
4462SDValue
4463SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4464 SelectionDAG &DAG) const {
4465 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4466 MachineFunction &MF = DAG.getMachineFunction();
4467 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4468 SDValue Chain = Op.getOperand(0);
4469 SDValue Size = Op.getOperand(1);
4470 SDValue Align = Op.getOperand(2);
4471 SDLoc DL(Op);
4472
4473 // If user has set the no alignment function attribute, ignore
4474 // alloca alignments.
4475 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4476
4477 uint64_t StackAlign = TFI->getStackAlignment();
4478 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4479 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4480
4481 SDValue NeededSpace = Size;
4482
4483 // Add extra space for alignment if needed.
4484 EVT PtrVT = getPointerTy(MF.getDataLayout());
4485 if (ExtraAlignSpace)
4486 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4487 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4488
4489 bool IsSigned = false;
4490 bool DoesNotReturn = false;
4491 bool IsReturnValueUsed = false;
4492 EVT VT = Op.getValueType();
4493 SDValue AllocaCall =
4494 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4495 CallingConv::C, IsSigned, DL, DoesNotReturn,
4496 IsReturnValueUsed)
4497 .first;
4498
4499 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4500 // to end of call in order to ensure it isn't broken up from the call
4501 // sequence.
4502 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4503 Register SPReg = Regs.getStackPointerRegister();
4504 Chain = AllocaCall.getValue(1);
4505 SDValue Glue = AllocaCall.getValue(2);
4506 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4507 Chain = NewSPRegNode.getValue(1);
4508
4509 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4510 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4511 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4512
4513 // Dynamically realign if needed.
4514 if (ExtraAlignSpace) {
4515 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4516 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4517 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4518 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4519 }
4520
4521 SDValue Ops[2] = {Result, Chain};
4522 return DAG.getMergeValues(Ops, DL);
4523}
4524
4525SDValue
4526SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4527 SelectionDAG &DAG) const {
4528 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4529 MachineFunction &MF = DAG.getMachineFunction();
4530 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4531 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4532
4533 SDValue Chain = Op.getOperand(0);
4534 SDValue Size = Op.getOperand(1);
4535 SDValue Align = Op.getOperand(2);
4536 SDLoc DL(Op);
4537
4538 // If user has set the no alignment function attribute, ignore
4539 // alloca alignments.
4540 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4541
4542 uint64_t StackAlign = TFI->getStackAlignment();
4543 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4544 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4545
4547 SDValue NeededSpace = Size;
4548
4549 // Get a reference to the stack pointer.
4550 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4551
4552 // If we need a backchain, save it now.
4553 SDValue Backchain;
4554 if (StoreBackchain)
4555 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4556 MachinePointerInfo());
4557
4558 // Add extra space for alignment if needed.
4559 if (ExtraAlignSpace)
4560 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4561 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4562
4563 // Get the new stack pointer value.
4564 SDValue NewSP;
4565 if (hasInlineStackProbe(MF)) {
4567 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4568 Chain = NewSP.getValue(1);
4569 }
4570 else {
4571 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4572 // Copy the new stack pointer back.
4573 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4574 }
4575
4576 // The allocated data lives above the 160 bytes allocated for the standard
4577 // frame, plus any outgoing stack arguments. We don't know how much that
4578 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4579 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4580 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4581
4582 // Dynamically realign if needed.
4583 if (RequiredAlign > StackAlign) {
4584 Result =
4585 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4586 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4587 Result =
4588 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4589 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4590 }
4591
4592 if (StoreBackchain)
4593 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4594 MachinePointerInfo());
4595
4596 SDValue Ops[2] = { Result, Chain };
4597 return DAG.getMergeValues(Ops, DL);
4598}
4599
4600SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4601 SDValue Op, SelectionDAG &DAG) const {
4602 SDLoc DL(Op);
4603
4604 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4605}
4606
4607SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4608 SelectionDAG &DAG,
4609 unsigned Opcode) const {
4610 EVT VT = Op.getValueType();
4611 SDLoc DL(Op);
4612 SDValue Even, Odd;
4613
4614 // This custom expander is only used on z17 and later for 64-bit types.
4615 assert(!is32Bit(VT));
4616 assert(Subtarget.hasMiscellaneousExtensions2());
4617
4618 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4619 // the high result in the even register. Return the latter.
4620 lowerGR128Binary(DAG, DL, VT, Opcode,
4621 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4622 return Even;
4623}
4624
4625SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4626 SelectionDAG &DAG) const {
4627 EVT VT = Op.getValueType();
4628 SDLoc DL(Op);
4629 SDValue Ops[2];
4630 if (is32Bit(VT))
4631 // Just do a normal 64-bit multiplication and extract the results.
4632 // We define this so that it can be used for constant division.
4633 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4634 Op.getOperand(1), Ops[1], Ops[0]);
4635 else if (Subtarget.hasMiscellaneousExtensions2())
4636 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4637 // the high result in the even register. ISD::SMUL_LOHI is defined to
4638 // return the low half first, so the results are in reverse order.
4640 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4641 else {
4642 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4643 //
4644 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4645 //
4646 // but using the fact that the upper halves are either all zeros
4647 // or all ones:
4648 //
4649 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4650 //
4651 // and grouping the right terms together since they are quicker than the
4652 // multiplication:
4653 //
4654 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4655 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4656 SDValue LL = Op.getOperand(0);
4657 SDValue RL = Op.getOperand(1);
4658 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4659 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4660 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4661 // the high result in the even register. ISD::SMUL_LOHI is defined to
4662 // return the low half first, so the results are in reverse order.
4664 LL, RL, Ops[1], Ops[0]);
4665 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4666 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4667 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4668 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4669 }
4670 return DAG.getMergeValues(Ops, DL);
4671}
4672
4673SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4674 SelectionDAG &DAG) const {
4675 EVT VT = Op.getValueType();
4676 SDLoc DL(Op);
4677 SDValue Ops[2];
4678 if (is32Bit(VT))
4679 // Just do a normal 64-bit multiplication and extract the results.
4680 // We define this so that it can be used for constant division.
4681 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4682 Op.getOperand(1), Ops[1], Ops[0]);
4683 else
4684 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4685 // the high result in the even register. ISD::UMUL_LOHI is defined to
4686 // return the low half first, so the results are in reverse order.
4688 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4689 return DAG.getMergeValues(Ops, DL);
4690}
4691
4692SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4693 SelectionDAG &DAG) const {
4694 SDValue Op0 = Op.getOperand(0);
4695 SDValue Op1 = Op.getOperand(1);
4696 EVT VT = Op.getValueType();
4697 SDLoc DL(Op);
4698
4699 // We use DSGF for 32-bit division. This means the first operand must
4700 // always be 64-bit, and the second operand should be 32-bit whenever
4701 // that is possible, to improve performance.
4702 if (is32Bit(VT))
4703 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4704 else if (DAG.ComputeNumSignBits(Op1) > 32)
4705 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4706
4707 // DSG(F) returns the remainder in the even register and the
4708 // quotient in the odd register.
4709 SDValue Ops[2];
4710 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4711 return DAG.getMergeValues(Ops, DL);
4712}
4713
4714SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4715 SelectionDAG &DAG) const {
4716 EVT VT = Op.getValueType();
4717 SDLoc DL(Op);
4718
4719 // DL(G) returns the remainder in the even register and the
4720 // quotient in the odd register.
4721 SDValue Ops[2];
4723 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4724 return DAG.getMergeValues(Ops, DL);
4725}
4726
4727SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4728 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4729
4730 // Get the known-zero masks for each operand.
4731 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4732 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4733 DAG.computeKnownBits(Ops[1])};
4734
4735 // See if the upper 32 bits of one operand and the lower 32 bits of the
4736 // other are known zero. They are the low and high operands respectively.
4737 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4738 Known[1].Zero.getZExtValue() };
4739 unsigned High, Low;
4740 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4741 High = 1, Low = 0;
4742 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4743 High = 0, Low = 1;
4744 else
4745 return Op;
4746
4747 SDValue LowOp = Ops[Low];
4748 SDValue HighOp = Ops[High];
4749
4750 // If the high part is a constant, we're better off using IILH.
4751 if (HighOp.getOpcode() == ISD::Constant)
4752 return Op;
4753
4754 // If the low part is a constant that is outside the range of LHI,
4755 // then we're better off using IILF.
4756 if (LowOp.getOpcode() == ISD::Constant) {
4757 int64_t Value = int32_t(LowOp->getAsZExtVal());
4758 if (!isInt<16>(Value))
4759 return Op;
4760 }
4761
4762 // Check whether the high part is an AND that doesn't change the
4763 // high 32 bits and just masks out low bits. We can skip it if so.
4764 if (HighOp.getOpcode() == ISD::AND &&
4765 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4766 SDValue HighOp0 = HighOp.getOperand(0);
4767 uint64_t Mask = HighOp.getConstantOperandVal(1);
4768 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4769 HighOp = HighOp0;
4770 }
4771
4772 // Take advantage of the fact that all GR32 operations only change the
4773 // low 32 bits by truncating Low to an i32 and inserting it directly
4774 // using a subreg. The interesting cases are those where the truncation
4775 // can be folded.
4776 SDLoc DL(Op);
4777 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4778 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4779 MVT::i64, HighOp, Low32);
4780}
4781
4782// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4783SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4784 SelectionDAG &DAG) const {
4785 SDNode *N = Op.getNode();
4786 SDValue LHS = N->getOperand(0);
4787 SDValue RHS = N->getOperand(1);
4788 SDLoc DL(N);
4789
4790 if (N->getValueType(0) == MVT::i128) {
4791 unsigned BaseOp = 0;
4792 unsigned FlagOp = 0;
4793 bool IsBorrow = false;
4794 switch (Op.getOpcode()) {
4795 default: llvm_unreachable("Unknown instruction!");
4796 case ISD::UADDO:
4797 BaseOp = ISD::ADD;
4798 FlagOp = SystemZISD::VACC;
4799 break;
4800 case ISD::USUBO:
4801 BaseOp = ISD::SUB;
4802 FlagOp = SystemZISD::VSCBI;
4803 IsBorrow = true;
4804 break;
4805 }
4806 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4807 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4808 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4809 DAG.getValueType(MVT::i1));
4810 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4811 if (IsBorrow)
4812 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4813 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4814 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4815 }
4816
4817 unsigned BaseOp = 0;
4818 unsigned CCValid = 0;
4819 unsigned CCMask = 0;
4820
4821 switch (Op.getOpcode()) {
4822 default: llvm_unreachable("Unknown instruction!");
4823 case ISD::SADDO:
4824 BaseOp = SystemZISD::SADDO;
4825 CCValid = SystemZ::CCMASK_ARITH;
4827 break;
4828 case ISD::SSUBO:
4829 BaseOp = SystemZISD::SSUBO;
4830 CCValid = SystemZ::CCMASK_ARITH;
4832 break;
4833 case ISD::UADDO:
4834 BaseOp = SystemZISD::UADDO;
4835 CCValid = SystemZ::CCMASK_LOGICAL;
4837 break;
4838 case ISD::USUBO:
4839 BaseOp = SystemZISD::USUBO;
4840 CCValid = SystemZ::CCMASK_LOGICAL;
4842 break;
4843 }
4844
4845 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4846 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4847
4848 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4849 if (N->getValueType(1) == MVT::i1)
4850 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4851
4852 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4853}
4854
4855static bool isAddCarryChain(SDValue Carry) {
4856 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4857 Carry->getValueType(0) != MVT::i128)
4858 Carry = Carry.getOperand(2);
4859 return Carry.getOpcode() == ISD::UADDO &&
4860 Carry->getValueType(0) != MVT::i128;
4861}
4862
4863static bool isSubBorrowChain(SDValue Carry) {
4864 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4865 Carry->getValueType(0) != MVT::i128)
4866 Carry = Carry.getOperand(2);
4867 return Carry.getOpcode() == ISD::USUBO &&
4868 Carry->getValueType(0) != MVT::i128;
4869}
4870
4871// Lower UADDO_CARRY/USUBO_CARRY nodes.
4872SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4873 SelectionDAG &DAG) const {
4874
4875 SDNode *N = Op.getNode();
4876 MVT VT = N->getSimpleValueType(0);
4877
4878 // Let legalize expand this if it isn't a legal type yet.
4879 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4880 return SDValue();
4881
4882 SDValue LHS = N->getOperand(0);
4883 SDValue RHS = N->getOperand(1);
4884 SDValue Carry = Op.getOperand(2);
4885 SDLoc DL(N);
4886
4887 if (VT == MVT::i128) {
4888 unsigned BaseOp = 0;
4889 unsigned FlagOp = 0;
4890 bool IsBorrow = false;
4891 switch (Op.getOpcode()) {
4892 default: llvm_unreachable("Unknown instruction!");
4893 case ISD::UADDO_CARRY:
4894 BaseOp = SystemZISD::VAC;
4895 FlagOp = SystemZISD::VACCC;
4896 break;
4897 case ISD::USUBO_CARRY:
4898 BaseOp = SystemZISD::VSBI;
4899 FlagOp = SystemZISD::VSBCBI;
4900 IsBorrow = true;
4901 break;
4902 }
4903 if (IsBorrow)
4904 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4905 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4906 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4907 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4908 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4909 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4910 DAG.getValueType(MVT::i1));
4911 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4912 if (IsBorrow)
4913 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4914 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4915 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4916 }
4917
4918 unsigned BaseOp = 0;
4919 unsigned CCValid = 0;
4920 unsigned CCMask = 0;
4921
4922 switch (Op.getOpcode()) {
4923 default: llvm_unreachable("Unknown instruction!");
4924 case ISD::UADDO_CARRY:
4925 if (!isAddCarryChain(Carry))
4926 return SDValue();
4927
4928 BaseOp = SystemZISD::ADDCARRY;
4929 CCValid = SystemZ::CCMASK_LOGICAL;
4931 break;
4932 case ISD::USUBO_CARRY:
4933 if (!isSubBorrowChain(Carry))
4934 return SDValue();
4935
4936 BaseOp = SystemZISD::SUBCARRY;
4937 CCValid = SystemZ::CCMASK_LOGICAL;
4939 break;
4940 }
4941
4942 // Set the condition code from the carry flag.
4943 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4944 DAG.getConstant(CCValid, DL, MVT::i32),
4945 DAG.getConstant(CCMask, DL, MVT::i32));
4946
4947 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4948 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4949
4950 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4951 if (N->getValueType(1) == MVT::i1)
4952 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4953
4954 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4955}
4956
4957SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4958 SelectionDAG &DAG) const {
4959 EVT VT = Op.getValueType();
4960 SDLoc DL(Op);
4961 Op = Op.getOperand(0);
4962
4963 if (VT.getScalarSizeInBits() == 128) {
4964 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4965 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4966 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4967 DAG.getConstant(0, DL, MVT::i64));
4968 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4969 return Op;
4970 }
4971
4972 // Handle vector types via VPOPCT.
4973 if (VT.isVector()) {
4974 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4975 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4976 switch (VT.getScalarSizeInBits()) {
4977 case 8:
4978 break;
4979 case 16: {
4980 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4981 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4982 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4983 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4984 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4985 break;
4986 }
4987 case 32: {
4988 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4989 DAG.getConstant(0, DL, MVT::i32));
4990 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4991 break;
4992 }
4993 case 64: {
4994 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4995 DAG.getConstant(0, DL, MVT::i32));
4996 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4997 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4998 break;
4999 }
5000 default:
5001 llvm_unreachable("Unexpected type");
5002 }
5003 return Op;
5004 }
5005
5006 // Get the known-zero mask for the operand.
5007 KnownBits Known = DAG.computeKnownBits(Op);
5008 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5009 if (NumSignificantBits == 0)
5010 return DAG.getConstant(0, DL, VT);
5011
5012 // Skip known-zero high parts of the operand.
5013 int64_t OrigBitSize = VT.getSizeInBits();
5014 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5015 BitSize = std::min(BitSize, OrigBitSize);
5016
5017 // The POPCNT instruction counts the number of bits in each byte.
5018 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5019 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5020 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5021
5022 // Add up per-byte counts in a binary tree. All bits of Op at
5023 // position larger than BitSize remain zero throughout.
5024 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5025 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5026 if (BitSize != OrigBitSize)
5027 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5028 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5029 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5030 }
5031
5032 // Extract overall result from high byte.
5033 if (BitSize > 8)
5034 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5035 DAG.getConstant(BitSize - 8, DL, VT));
5036
5037 return Op;
5038}
5039
5040SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5041 SelectionDAG &DAG) const {
5042 SDLoc DL(Op);
5043 AtomicOrdering FenceOrdering =
5044 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5045 SyncScope::ID FenceSSID =
5046 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5047
5048 // The only fence that needs an instruction is a sequentially-consistent
5049 // cross-thread fence.
5050 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5051 FenceSSID == SyncScope::System) {
5052 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5053 Op.getOperand(0)),
5054 0);
5055 }
5056
5057 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5058 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5059}
5060
5061SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5062 SelectionDAG &DAG) const {
5063 EVT RegVT = Op.getValueType();
5064 if (RegVT.getSizeInBits() == 128)
5065 return lowerATOMIC_LDST_I128(Op, DAG);
5066 return lowerLoadF16(Op, DAG);
5067}
5068
5069SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5070 SelectionDAG &DAG) const {
5071 auto *Node = cast<AtomicSDNode>(Op.getNode());
5072 if (Node->getMemoryVT().getSizeInBits() == 128)
5073 return lowerATOMIC_LDST_I128(Op, DAG);
5074 return lowerStoreF16(Op, DAG);
5075}
5076
5077SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5078 SelectionDAG &DAG) const {
5079 auto *Node = cast<AtomicSDNode>(Op.getNode());
5080 assert(
5081 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5082 "Only custom lowering i128 or f128.");
5083 // Use same code to handle both legal and non-legal i128 types.
5085 LowerOperationWrapper(Node, Results, DAG);
5086 return DAG.getMergeValues(Results, SDLoc(Op));
5087}
5088
5089// Prepare for a Compare And Swap for a subword operation. This needs to be
5090// done in memory with 4 bytes at natural alignment.
5092 SDValue &AlignedAddr, SDValue &BitShift,
5093 SDValue &NegBitShift) {
5094 EVT PtrVT = Addr.getValueType();
5095 EVT WideVT = MVT::i32;
5096
5097 // Get the address of the containing word.
5098 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5099 DAG.getSignedConstant(-4, DL, PtrVT));
5100
5101 // Get the number of bits that the word must be rotated left in order
5102 // to bring the field to the top bits of a GR32.
5103 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5104 DAG.getConstant(3, DL, PtrVT));
5105 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5106
5107 // Get the complementing shift amount, for rotating a field in the top
5108 // bits back to its proper position.
5109 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5110 DAG.getConstant(0, DL, WideVT), BitShift);
5111
5112}
5113
5114// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5115// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5116SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5117 SelectionDAG &DAG,
5118 unsigned Opcode) const {
5119 auto *Node = cast<AtomicSDNode>(Op.getNode());
5120
5121 // 32-bit operations need no special handling.
5122 EVT NarrowVT = Node->getMemoryVT();
5123 EVT WideVT = MVT::i32;
5124 if (NarrowVT == WideVT)
5125 return Op;
5126
5127 int64_t BitSize = NarrowVT.getSizeInBits();
5128 SDValue ChainIn = Node->getChain();
5129 SDValue Addr = Node->getBasePtr();
5130 SDValue Src2 = Node->getVal();
5131 MachineMemOperand *MMO = Node->getMemOperand();
5132 SDLoc DL(Node);
5133
5134 // Convert atomic subtracts of constants into additions.
5135 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5136 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5138 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5139 Src2.getValueType());
5140 }
5141
5142 SDValue AlignedAddr, BitShift, NegBitShift;
5143 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5144
5145 // Extend the source operand to 32 bits and prepare it for the inner loop.
5146 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5147 // operations require the source to be shifted in advance. (This shift
5148 // can be folded if the source is constant.) For AND and NAND, the lower
5149 // bits must be set, while for other opcodes they should be left clear.
5150 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5151 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5152 DAG.getConstant(32 - BitSize, DL, WideVT));
5153 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5155 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5156 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5157
5158 // Construct the ATOMIC_LOADW_* node.
5159 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5160 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5161 DAG.getConstant(BitSize, DL, WideVT) };
5162 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5163 NarrowVT, MMO);
5164
5165 // Rotate the result of the final CS so that the field is in the lower
5166 // bits of a GR32, then truncate it.
5167 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5168 DAG.getConstant(BitSize, DL, WideVT));
5169 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5170
5171 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5172 return DAG.getMergeValues(RetOps, DL);
5173}
5174
5175// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5176// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5177SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5178 SelectionDAG &DAG) const {
5179 auto *Node = cast<AtomicSDNode>(Op.getNode());
5180 EVT MemVT = Node->getMemoryVT();
5181 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5182 // A full-width operation: negate and use LAA(G).
5183 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5184 assert(Subtarget.hasInterlockedAccess1() &&
5185 "Should have been expanded by AtomicExpand pass.");
5186 SDValue Src2 = Node->getVal();
5187 SDLoc DL(Src2);
5188 SDValue NegSrc2 =
5189 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5190 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5191 Node->getChain(), Node->getBasePtr(), NegSrc2,
5192 Node->getMemOperand());
5193 }
5194
5195 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5196}
5197
5198// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5199SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5200 SelectionDAG &DAG) const {
5201 auto *Node = cast<AtomicSDNode>(Op.getNode());
5202 SDValue ChainIn = Node->getOperand(0);
5203 SDValue Addr = Node->getOperand(1);
5204 SDValue CmpVal = Node->getOperand(2);
5205 SDValue SwapVal = Node->getOperand(3);
5206 MachineMemOperand *MMO = Node->getMemOperand();
5207 SDLoc DL(Node);
5208
5209 if (Node->getMemoryVT() == MVT::i128) {
5210 // Use same code to handle both legal and non-legal i128 types.
5212 LowerOperationWrapper(Node, Results, DAG);
5213 return DAG.getMergeValues(Results, DL);
5214 }
5215
5216 // We have native support for 32-bit and 64-bit compare and swap, but we
5217 // still need to expand extracting the "success" result from the CC.
5218 EVT NarrowVT = Node->getMemoryVT();
5219 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5220 if (NarrowVT == WideVT) {
5221 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5222 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5224 DL, Tys, Ops, NarrowVT, MMO);
5225 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5227
5228 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5229 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5230 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5231 return SDValue();
5232 }
5233
5234 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5235 // via a fullword ATOMIC_CMP_SWAPW operation.
5236 int64_t BitSize = NarrowVT.getSizeInBits();
5237
5238 SDValue AlignedAddr, BitShift, NegBitShift;
5239 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5240
5241 // Construct the ATOMIC_CMP_SWAPW node.
5242 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5243 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5244 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5246 VTList, Ops, NarrowVT, MMO);
5247 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5249
5250 // emitAtomicCmpSwapW() will zero extend the result (original value).
5251 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5252 DAG.getValueType(NarrowVT));
5253 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5254 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5255 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5256 return SDValue();
5257}
5258
5260SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5261 // Because of how we convert atomic_load and atomic_store to normal loads and
5262 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5263 // since DAGCombine hasn't been updated to account for atomic, but non
5264 // volatile loads. (See D57601)
5265 if (auto *SI = dyn_cast<StoreInst>(&I))
5266 if (SI->isAtomic())
5268 if (auto *LI = dyn_cast<LoadInst>(&I))
5269 if (LI->isAtomic())
5271 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5272 if (AI->isAtomic())
5274 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5275 if (AI->isAtomic())
5278}
5279
5280SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5281 SelectionDAG &DAG) const {
5282 MachineFunction &MF = DAG.getMachineFunction();
5283 auto *Regs = Subtarget.getSpecialRegisters();
5285 report_fatal_error("Variable-sized stack allocations are not supported "
5286 "in GHC calling convention");
5287 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5288 Regs->getStackPointerRegister(), Op.getValueType());
5289}
5290
5291SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5292 SelectionDAG &DAG) const {
5293 MachineFunction &MF = DAG.getMachineFunction();
5294 auto *Regs = Subtarget.getSpecialRegisters();
5295 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5296
5298 report_fatal_error("Variable-sized stack allocations are not supported "
5299 "in GHC calling convention");
5300
5301 SDValue Chain = Op.getOperand(0);
5302 SDValue NewSP = Op.getOperand(1);
5303 SDValue Backchain;
5304 SDLoc DL(Op);
5305
5306 if (StoreBackchain) {
5307 SDValue OldSP = DAG.getCopyFromReg(
5308 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5309 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5310 MachinePointerInfo());
5311 }
5312
5313 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5314
5315 if (StoreBackchain)
5316 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5317 MachinePointerInfo());
5318
5319 return Chain;
5320}
5321
5322SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5323 SelectionDAG &DAG) const {
5324 bool IsData = Op.getConstantOperandVal(4);
5325 if (!IsData)
5326 // Just preserve the chain.
5327 return Op.getOperand(0);
5328
5329 SDLoc DL(Op);
5330 bool IsWrite = Op.getConstantOperandVal(2);
5331 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5332 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5333 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5334 Op.getOperand(1)};
5336 Node->getVTList(), Ops,
5337 Node->getMemoryVT(), Node->getMemOperand());
5338}
5339
5340SDValue
5341SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5342 SelectionDAG &DAG) const {
5343 unsigned Opcode, CCValid;
5344 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5345 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5346 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5347 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5348 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5349 return SDValue();
5350 }
5351
5352 return SDValue();
5353}
5354
5355SDValue
5356SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5357 SelectionDAG &DAG) const {
5358 unsigned Opcode, CCValid;
5359 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5360 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5361 if (Op->getNumValues() == 1)
5362 return getCCResult(DAG, SDValue(Node, 0));
5363 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5364 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5365 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5366 }
5367
5368 unsigned Id = Op.getConstantOperandVal(0);
5369 switch (Id) {
5370 case Intrinsic::thread_pointer:
5371 return lowerThreadPointer(SDLoc(Op), DAG);
5372
5373 case Intrinsic::s390_vpdi:
5374 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5375 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5376
5377 case Intrinsic::s390_vperm:
5378 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5379 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5380
5381 case Intrinsic::s390_vuphb:
5382 case Intrinsic::s390_vuphh:
5383 case Intrinsic::s390_vuphf:
5384 case Intrinsic::s390_vuphg:
5385 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5386 Op.getOperand(1));
5387
5388 case Intrinsic::s390_vuplhb:
5389 case Intrinsic::s390_vuplhh:
5390 case Intrinsic::s390_vuplhf:
5391 case Intrinsic::s390_vuplhg:
5392 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5393 Op.getOperand(1));
5394
5395 case Intrinsic::s390_vuplb:
5396 case Intrinsic::s390_vuplhw:
5397 case Intrinsic::s390_vuplf:
5398 case Intrinsic::s390_vuplg:
5399 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5400 Op.getOperand(1));
5401
5402 case Intrinsic::s390_vupllb:
5403 case Intrinsic::s390_vupllh:
5404 case Intrinsic::s390_vupllf:
5405 case Intrinsic::s390_vupllg:
5406 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5407 Op.getOperand(1));
5408
5409 case Intrinsic::s390_vsumb:
5410 case Intrinsic::s390_vsumh:
5411 case Intrinsic::s390_vsumgh:
5412 case Intrinsic::s390_vsumgf:
5413 case Intrinsic::s390_vsumqf:
5414 case Intrinsic::s390_vsumqg:
5415 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5416 Op.getOperand(1), Op.getOperand(2));
5417
5418 case Intrinsic::s390_vaq:
5419 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5420 Op.getOperand(1), Op.getOperand(2));
5421 case Intrinsic::s390_vaccb:
5422 case Intrinsic::s390_vacch:
5423 case Intrinsic::s390_vaccf:
5424 case Intrinsic::s390_vaccg:
5425 case Intrinsic::s390_vaccq:
5426 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5427 Op.getOperand(1), Op.getOperand(2));
5428 case Intrinsic::s390_vacq:
5429 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5430 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5431 case Intrinsic::s390_vacccq:
5432 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5433 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5434
5435 case Intrinsic::s390_vsq:
5436 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5437 Op.getOperand(1), Op.getOperand(2));
5438 case Intrinsic::s390_vscbib:
5439 case Intrinsic::s390_vscbih:
5440 case Intrinsic::s390_vscbif:
5441 case Intrinsic::s390_vscbig:
5442 case Intrinsic::s390_vscbiq:
5443 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5444 Op.getOperand(1), Op.getOperand(2));
5445 case Intrinsic::s390_vsbiq:
5446 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5447 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5448 case Intrinsic::s390_vsbcbiq:
5449 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5450 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5451
5452 case Intrinsic::s390_vmhb:
5453 case Intrinsic::s390_vmhh:
5454 case Intrinsic::s390_vmhf:
5455 case Intrinsic::s390_vmhg:
5456 case Intrinsic::s390_vmhq:
5457 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5458 Op.getOperand(1), Op.getOperand(2));
5459 case Intrinsic::s390_vmlhb:
5460 case Intrinsic::s390_vmlhh:
5461 case Intrinsic::s390_vmlhf:
5462 case Intrinsic::s390_vmlhg:
5463 case Intrinsic::s390_vmlhq:
5464 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5465 Op.getOperand(1), Op.getOperand(2));
5466
5467 case Intrinsic::s390_vmahb:
5468 case Intrinsic::s390_vmahh:
5469 case Intrinsic::s390_vmahf:
5470 case Intrinsic::s390_vmahg:
5471 case Intrinsic::s390_vmahq:
5472 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5473 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5474 case Intrinsic::s390_vmalhb:
5475 case Intrinsic::s390_vmalhh:
5476 case Intrinsic::s390_vmalhf:
5477 case Intrinsic::s390_vmalhg:
5478 case Intrinsic::s390_vmalhq:
5479 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5480 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5481
5482 case Intrinsic::s390_vmeb:
5483 case Intrinsic::s390_vmeh:
5484 case Intrinsic::s390_vmef:
5485 case Intrinsic::s390_vmeg:
5486 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5487 Op.getOperand(1), Op.getOperand(2));
5488 case Intrinsic::s390_vmleb:
5489 case Intrinsic::s390_vmleh:
5490 case Intrinsic::s390_vmlef:
5491 case Intrinsic::s390_vmleg:
5492 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5493 Op.getOperand(1), Op.getOperand(2));
5494 case Intrinsic::s390_vmob:
5495 case Intrinsic::s390_vmoh:
5496 case Intrinsic::s390_vmof:
5497 case Intrinsic::s390_vmog:
5498 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5499 Op.getOperand(1), Op.getOperand(2));
5500 case Intrinsic::s390_vmlob:
5501 case Intrinsic::s390_vmloh:
5502 case Intrinsic::s390_vmlof:
5503 case Intrinsic::s390_vmlog:
5504 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5505 Op.getOperand(1), Op.getOperand(2));
5506
5507 case Intrinsic::s390_vmaeb:
5508 case Intrinsic::s390_vmaeh:
5509 case Intrinsic::s390_vmaef:
5510 case Intrinsic::s390_vmaeg:
5511 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5512 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5513 Op.getOperand(1), Op.getOperand(2)),
5514 Op.getOperand(3));
5515 case Intrinsic::s390_vmaleb:
5516 case Intrinsic::s390_vmaleh:
5517 case Intrinsic::s390_vmalef:
5518 case Intrinsic::s390_vmaleg:
5519 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5520 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5521 Op.getOperand(1), Op.getOperand(2)),
5522 Op.getOperand(3));
5523 case Intrinsic::s390_vmaob:
5524 case Intrinsic::s390_vmaoh:
5525 case Intrinsic::s390_vmaof:
5526 case Intrinsic::s390_vmaog:
5527 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5528 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5529 Op.getOperand(1), Op.getOperand(2)),
5530 Op.getOperand(3));
5531 case Intrinsic::s390_vmalob:
5532 case Intrinsic::s390_vmaloh:
5533 case Intrinsic::s390_vmalof:
5534 case Intrinsic::s390_vmalog:
5535 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5536 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5537 Op.getOperand(1), Op.getOperand(2)),
5538 Op.getOperand(3));
5539 }
5540
5541 return SDValue();
5542}
5543
5544namespace {
5545// Says that SystemZISD operation Opcode can be used to perform the equivalent
5546// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5547// Operand is the constant third operand, otherwise it is the number of
5548// bytes in each element of the result.
5549struct Permute {
5550 unsigned Opcode;
5551 unsigned Operand;
5552 unsigned char Bytes[SystemZ::VectorBytes];
5553};
5554}
5555
5556static const Permute PermuteForms[] = {
5557 // VMRHG
5559 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5560 // VMRHF
5562 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5563 // VMRHH
5565 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5566 // VMRHB
5568 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5569 // VMRLG
5571 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5572 // VMRLF
5574 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5575 // VMRLH
5577 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5578 // VMRLB
5580 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5581 // VPKG
5582 { SystemZISD::PACK, 4,
5583 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5584 // VPKF
5585 { SystemZISD::PACK, 2,
5586 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5587 // VPKH
5588 { SystemZISD::PACK, 1,
5589 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5590 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5592 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5593 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5595 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5596};
5597
5598// Called after matching a vector shuffle against a particular pattern.
5599// Both the original shuffle and the pattern have two vector operands.
5600// OpNos[0] is the operand of the original shuffle that should be used for
5601// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5602// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5603// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5604// for operands 0 and 1 of the pattern.
5605static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5606 if (OpNos[0] < 0) {
5607 if (OpNos[1] < 0)
5608 return false;
5609 OpNo0 = OpNo1 = OpNos[1];
5610 } else if (OpNos[1] < 0) {
5611 OpNo0 = OpNo1 = OpNos[0];
5612 } else {
5613 OpNo0 = OpNos[0];
5614 OpNo1 = OpNos[1];
5615 }
5616 return true;
5617}
5618
5619// Bytes is a VPERM-like permute vector, except that -1 is used for
5620// undefined bytes. Return true if the VPERM can be implemented using P.
5621// When returning true set OpNo0 to the VPERM operand that should be
5622// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5623//
5624// For example, if swapping the VPERM operands allows P to match, OpNo0
5625// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5626// operand, but rewriting it to use two duplicated operands allows it to
5627// match P, then OpNo0 and OpNo1 will be the same.
5628static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5629 unsigned &OpNo0, unsigned &OpNo1) {
5630 int OpNos[] = { -1, -1 };
5631 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5632 int Elt = Bytes[I];
5633 if (Elt >= 0) {
5634 // Make sure that the two permute vectors use the same suboperand
5635 // byte number. Only the operand numbers (the high bits) are
5636 // allowed to differ.
5637 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5638 return false;
5639 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5640 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5641 // Make sure that the operand mappings are consistent with previous
5642 // elements.
5643 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5644 return false;
5645 OpNos[ModelOpNo] = RealOpNo;
5646 }
5647 }
5648 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5649}
5650
5651// As above, but search for a matching permute.
5652static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5653 unsigned &OpNo0, unsigned &OpNo1) {
5654 for (auto &P : PermuteForms)
5655 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5656 return &P;
5657 return nullptr;
5658}
5659
5660// Bytes is a VPERM-like permute vector, except that -1 is used for
5661// undefined bytes. This permute is an operand of an outer permute.
5662// See whether redistributing the -1 bytes gives a shuffle that can be
5663// implemented using P. If so, set Transform to a VPERM-like permute vector
5664// that, when applied to the result of P, gives the original permute in Bytes.
5666 const Permute &P,
5667 SmallVectorImpl<int> &Transform) {
5668 unsigned To = 0;
5669 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5670 int Elt = Bytes[From];
5671 if (Elt < 0)
5672 // Byte number From of the result is undefined.
5673 Transform[From] = -1;
5674 else {
5675 while (P.Bytes[To] != Elt) {
5676 To += 1;
5677 if (To == SystemZ::VectorBytes)
5678 return false;
5679 }
5680 Transform[From] = To;
5681 }
5682 }
5683 return true;
5684}
5685
5686// As above, but search for a matching permute.
5687static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5688 SmallVectorImpl<int> &Transform) {
5689 for (auto &P : PermuteForms)
5690 if (matchDoublePermute(Bytes, P, Transform))
5691 return &P;
5692 return nullptr;
5693}
5694
5695// Convert the mask of the given shuffle op into a byte-level mask,
5696// as if it had type vNi8.
5697static bool getVPermMask(SDValue ShuffleOp,
5698 SmallVectorImpl<int> &Bytes) {
5699 EVT VT = ShuffleOp.getValueType();
5700 unsigned NumElements = VT.getVectorNumElements();
5701 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5702
5703 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5704 Bytes.resize(NumElements * BytesPerElement, -1);
5705 for (unsigned I = 0; I < NumElements; ++I) {
5706 int Index = VSN->getMaskElt(I);
5707 if (Index >= 0)
5708 for (unsigned J = 0; J < BytesPerElement; ++J)
5709 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5710 }
5711 return true;
5712 }
5713 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5714 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5715 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5716 Bytes.resize(NumElements * BytesPerElement, -1);
5717 for (unsigned I = 0; I < NumElements; ++I)
5718 for (unsigned J = 0; J < BytesPerElement; ++J)
5719 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5720 return true;
5721 }
5722 return false;
5723}
5724
5725// Bytes is a VPERM-like permute vector, except that -1 is used for
5726// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5727// the result come from a contiguous sequence of bytes from one input.
5728// Set Base to the selector for the first byte if so.
5729static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5730 unsigned BytesPerElement, int &Base) {
5731 Base = -1;
5732 for (unsigned I = 0; I < BytesPerElement; ++I) {
5733 if (Bytes[Start + I] >= 0) {
5734 unsigned Elem = Bytes[Start + I];
5735 if (Base < 0) {
5736 Base = Elem - I;
5737 // Make sure the bytes would come from one input operand.
5738 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5739 return false;
5740 } else if (unsigned(Base) != Elem - I)
5741 return false;
5742 }
5743 }
5744 return true;
5745}
5746
5747// Bytes is a VPERM-like permute vector, except that -1 is used for
5748// undefined bytes. Return true if it can be performed using VSLDB.
5749// When returning true, set StartIndex to the shift amount and OpNo0
5750// and OpNo1 to the VPERM operands that should be used as the first
5751// and second shift operand respectively.
5753 unsigned &StartIndex, unsigned &OpNo0,
5754 unsigned &OpNo1) {
5755 int OpNos[] = { -1, -1 };
5756 int Shift = -1;
5757 for (unsigned I = 0; I < 16; ++I) {
5758 int Index = Bytes[I];
5759 if (Index >= 0) {
5760 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5761 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5762 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5763 if (Shift < 0)
5764 Shift = ExpectedShift;
5765 else if (Shift != ExpectedShift)
5766 return false;
5767 // Make sure that the operand mappings are consistent with previous
5768 // elements.
5769 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5770 return false;
5771 OpNos[ModelOpNo] = RealOpNo;
5772 }
5773 }
5774 StartIndex = Shift;
5775 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5776}
5777
5778// Create a node that performs P on operands Op0 and Op1, casting the
5779// operands to the appropriate type. The type of the result is determined by P.
5781 const Permute &P, SDValue Op0, SDValue Op1) {
5782 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5783 // elements of a PACK are twice as wide as the outputs.
5784 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5785 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5786 P.Operand);
5787 // Cast both operands to the appropriate type.
5788 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5789 SystemZ::VectorBytes / InBytes);
5790 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5791 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5792 SDValue Op;
5793 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5794 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5795 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5796 } else if (P.Opcode == SystemZISD::PACK) {
5797 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5798 SystemZ::VectorBytes / P.Operand);
5799 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5800 } else {
5801 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5802 }
5803 return Op;
5804}
5805
5806static bool isZeroVector(SDValue N) {
5807 if (N->getOpcode() == ISD::BITCAST)
5808 N = N->getOperand(0);
5809 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5810 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5811 return Op->getZExtValue() == 0;
5812 return ISD::isBuildVectorAllZeros(N.getNode());
5813}
5814
5815// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5816static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5817 for (unsigned I = 0; I < Num ; I++)
5818 if (isZeroVector(Ops[I]))
5819 return I;
5820 return UINT32_MAX;
5821}
5822
5823// Bytes is a VPERM-like permute vector, except that -1 is used for
5824// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5825// VSLDB or VPERM.
5827 SDValue *Ops,
5828 const SmallVectorImpl<int> &Bytes) {
5829 for (unsigned I = 0; I < 2; ++I)
5830 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5831
5832 // First see whether VSLDB can be used.
5833 unsigned StartIndex, OpNo0, OpNo1;
5834 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5835 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5836 Ops[OpNo1],
5837 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5838
5839 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5840 // eliminate a zero vector by reusing any zero index in the permute vector.
5841 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5842 if (ZeroVecIdx != UINT32_MAX) {
5843 bool MaskFirst = true;
5844 int ZeroIdx = -1;
5845 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5846 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5847 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5848 if (OpNo == ZeroVecIdx && I == 0) {
5849 // If the first byte is zero, use mask as first operand.
5850 ZeroIdx = 0;
5851 break;
5852 }
5853 if (OpNo != ZeroVecIdx && Byte == 0) {
5854 // If mask contains a zero, use it by placing that vector first.
5855 ZeroIdx = I + SystemZ::VectorBytes;
5856 MaskFirst = false;
5857 break;
5858 }
5859 }
5860 if (ZeroIdx != -1) {
5861 SDValue IndexNodes[SystemZ::VectorBytes];
5862 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5863 if (Bytes[I] >= 0) {
5864 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5865 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5866 if (OpNo == ZeroVecIdx)
5867 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5868 else {
5869 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5870 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5871 }
5872 } else
5873 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5874 }
5875 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5876 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5877 if (MaskFirst)
5878 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5879 Mask);
5880 else
5881 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5882 Mask);
5883 }
5884 }
5885
5886 SDValue IndexNodes[SystemZ::VectorBytes];
5887 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5888 if (Bytes[I] >= 0)
5889 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5890 else
5891 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5892 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5893 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5894 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5895}
5896
5897namespace {
5898// Describes a general N-operand vector shuffle.
5899struct GeneralShuffle {
5900 GeneralShuffle(EVT vt)
5901 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5902 void addUndef();
5903 bool add(SDValue, unsigned);
5904 SDValue getNode(SelectionDAG &, const SDLoc &);
5905 void tryPrepareForUnpack();
5906 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5907 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5908
5909 // The operands of the shuffle.
5911
5912 // Index I is -1 if byte I of the result is undefined. Otherwise the
5913 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5914 // Bytes[I] / SystemZ::VectorBytes.
5916
5917 // The type of the shuffle result.
5918 EVT VT;
5919
5920 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5921 unsigned UnpackFromEltSize;
5922 // True if the final unpack uses the low half.
5923 bool UnpackLow;
5924};
5925} // namespace
5926
5927// Add an extra undefined element to the shuffle.
5928void GeneralShuffle::addUndef() {
5929 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5930 for (unsigned I = 0; I < BytesPerElement; ++I)
5931 Bytes.push_back(-1);
5932}
5933
5934// Add an extra element to the shuffle, taking it from element Elem of Op.
5935// A null Op indicates a vector input whose value will be calculated later;
5936// there is at most one such input per shuffle and it always has the same
5937// type as the result. Aborts and returns false if the source vector elements
5938// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5939// LLVM they become implicitly extended, but this is rare and not optimized.
5940bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5941 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5942
5943 // The source vector can have wider elements than the result,
5944 // either through an explicit TRUNCATE or because of type legalization.
5945 // We want the least significant part.
5946 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5947 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5948
5949 // Return false if the source elements are smaller than their destination
5950 // elements.
5951 if (FromBytesPerElement < BytesPerElement)
5952 return false;
5953
5954 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5955 (FromBytesPerElement - BytesPerElement));
5956
5957 // Look through things like shuffles and bitcasts.
5958 while (Op.getNode()) {
5959 if (Op.getOpcode() == ISD::BITCAST)
5960 Op = Op.getOperand(0);
5961 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5962 // See whether the bytes we need come from a contiguous part of one
5963 // operand.
5965 if (!getVPermMask(Op, OpBytes))
5966 break;
5967 int NewByte;
5968 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5969 break;
5970 if (NewByte < 0) {
5971 addUndef();
5972 return true;
5973 }
5974 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5975 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5976 } else if (Op.isUndef()) {
5977 addUndef();
5978 return true;
5979 } else
5980 break;
5981 }
5982
5983 // Make sure that the source of the extraction is in Ops.
5984 unsigned OpNo = 0;
5985 for (; OpNo < Ops.size(); ++OpNo)
5986 if (Ops[OpNo] == Op)
5987 break;
5988 if (OpNo == Ops.size())
5989 Ops.push_back(Op);
5990
5991 // Add the element to Bytes.
5992 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5993 for (unsigned I = 0; I < BytesPerElement; ++I)
5994 Bytes.push_back(Base + I);
5995
5996 return true;
5997}
5998
5999// Return SDNodes for the completed shuffle.
6000SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6001 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6002
6003 if (Ops.size() == 0)
6004 return DAG.getUNDEF(VT);
6005
6006 // Use a single unpack if possible as the last operation.
6007 tryPrepareForUnpack();
6008
6009 // Make sure that there are at least two shuffle operands.
6010 if (Ops.size() == 1)
6011 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6012
6013 // Create a tree of shuffles, deferring root node until after the loop.
6014 // Try to redistribute the undefined elements of non-root nodes so that
6015 // the non-root shuffles match something like a pack or merge, then adjust
6016 // the parent node's permute vector to compensate for the new order.
6017 // Among other things, this copes with vectors like <2 x i16> that were
6018 // padded with undefined elements during type legalization.
6019 //
6020 // In the best case this redistribution will lead to the whole tree
6021 // using packs and merges. It should rarely be a loss in other cases.
6022 unsigned Stride = 1;
6023 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6024 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6025 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6026
6027 // Create a mask for just these two operands.
6029 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6030 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6031 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6032 if (OpNo == I)
6033 NewBytes[J] = Byte;
6034 else if (OpNo == I + Stride)
6035 NewBytes[J] = SystemZ::VectorBytes + Byte;
6036 else
6037 NewBytes[J] = -1;
6038 }
6039 // See if it would be better to reorganize NewMask to avoid using VPERM.
6041 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6042 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6043 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6044 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6045 if (NewBytes[J] >= 0) {
6046 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6047 "Invalid double permute");
6048 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6049 } else
6050 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6051 }
6052 } else {
6053 // Just use NewBytes on the operands.
6054 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6055 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6056 if (NewBytes[J] >= 0)
6057 Bytes[J] = I * SystemZ::VectorBytes + J;
6058 }
6059 }
6060 }
6061
6062 // Now we just have 2 inputs. Put the second operand in Ops[1].
6063 if (Stride > 1) {
6064 Ops[1] = Ops[Stride];
6065 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6066 if (Bytes[I] >= int(SystemZ::VectorBytes))
6067 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6068 }
6069
6070 // Look for an instruction that can do the permute without resorting
6071 // to VPERM.
6072 unsigned OpNo0, OpNo1;
6073 SDValue Op;
6074 if (unpackWasPrepared() && Ops[1].isUndef())
6075 Op = Ops[0];
6076 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6077 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6078 else
6079 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6080
6081 Op = insertUnpackIfPrepared(DAG, DL, Op);
6082
6083 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6084}
6085
6086#ifndef NDEBUG
6087static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6088 dbgs() << Msg.c_str() << " { ";
6089 for (unsigned I = 0; I < Bytes.size(); I++)
6090 dbgs() << Bytes[I] << " ";
6091 dbgs() << "}\n";
6092}
6093#endif
6094
6095// If the Bytes vector matches an unpack operation, prepare to do the unpack
6096// after all else by removing the zero vector and the effect of the unpack on
6097// Bytes.
6098void GeneralShuffle::tryPrepareForUnpack() {
6099 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6100 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6101 return;
6102
6103 // Only do this if removing the zero vector reduces the depth, otherwise
6104 // the critical path will increase with the final unpack.
6105 if (Ops.size() > 2 &&
6106 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6107 return;
6108
6109 // Find an unpack that would allow removing the zero vector from Ops.
6110 UnpackFromEltSize = 1;
6111 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6112 bool MatchUnpack = true;
6114 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6115 unsigned ToEltSize = UnpackFromEltSize * 2;
6116 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6117 if (!IsZextByte)
6118 SrcBytes.push_back(Bytes[Elt]);
6119 if (Bytes[Elt] != -1) {
6120 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6121 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6122 MatchUnpack = false;
6123 break;
6124 }
6125 }
6126 }
6127 if (MatchUnpack) {
6128 if (Ops.size() == 2) {
6129 // Don't use unpack if a single source operand needs rearrangement.
6130 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6131 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6132 if (SrcBytes[i] == -1)
6133 continue;
6134 if (SrcBytes[i] % 16 != int(i))
6135 CanUseUnpackHigh = false;
6136 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6137 CanUseUnpackLow = false;
6138 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6139 UnpackFromEltSize = UINT_MAX;
6140 return;
6141 }
6142 }
6143 if (!CanUseUnpackHigh)
6144 UnpackLow = true;
6145 }
6146 break;
6147 }
6148 }
6149 if (UnpackFromEltSize > 4)
6150 return;
6151
6152 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6153 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6154 << ".\n";
6155 dumpBytes(Bytes, "Original Bytes vector:"););
6156
6157 // Apply the unpack in reverse to the Bytes array.
6158 unsigned B = 0;
6159 if (UnpackLow) {
6160 while (B < SystemZ::VectorBytes / 2)
6161 Bytes[B++] = -1;
6162 }
6163 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6164 Elt += UnpackFromEltSize;
6165 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6166 Bytes[B] = Bytes[Elt];
6167 }
6168 if (!UnpackLow) {
6169 while (B < SystemZ::VectorBytes)
6170 Bytes[B++] = -1;
6171 }
6172
6173 // Remove the zero vector from Ops
6174 Ops.erase(&Ops[ZeroVecOpNo]);
6175 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6176 if (Bytes[I] >= 0) {
6177 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6178 if (OpNo > ZeroVecOpNo)
6179 Bytes[I] -= SystemZ::VectorBytes;
6180 }
6181
6182 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6183 dbgs() << "\n";);
6184}
6185
6186SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6187 const SDLoc &DL,
6188 SDValue Op) {
6189 if (!unpackWasPrepared())
6190 return Op;
6191 unsigned InBits = UnpackFromEltSize * 8;
6192 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6193 SystemZ::VectorBits / InBits);
6194 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6195 unsigned OutBits = InBits * 2;
6196 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6197 SystemZ::VectorBits / OutBits);
6198 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6200 DL, OutVT, PackedOp);
6201}
6202
6203// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6205 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6206 if (!Op.getOperand(I).isUndef())
6207 return false;
6208 return true;
6209}
6210
6211// Return a vector of type VT that contains Value in the first element.
6212// The other elements don't matter.
6214 SDValue Value) {
6215 // If we have a constant, replicate it to all elements and let the
6216 // BUILD_VECTOR lowering take care of it.
6217 if (Value.getOpcode() == ISD::Constant ||
6218 Value.getOpcode() == ISD::ConstantFP) {
6220 return DAG.getBuildVector(VT, DL, Ops);
6221 }
6222 if (Value.isUndef())
6223 return DAG.getUNDEF(VT);
6224 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6225}
6226
6227// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6228// element 1. Used for cases in which replication is cheap.
6230 SDValue Op0, SDValue Op1) {
6231 if (Op0.isUndef()) {
6232 if (Op1.isUndef())
6233 return DAG.getUNDEF(VT);
6234 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6235 }
6236 if (Op1.isUndef())
6237 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6238 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6239 buildScalarToVector(DAG, DL, VT, Op0),
6240 buildScalarToVector(DAG, DL, VT, Op1));
6241}
6242
6243// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6244// vector for them.
6246 SDValue Op1) {
6247 if (Op0.isUndef() && Op1.isUndef())
6248 return DAG.getUNDEF(MVT::v2i64);
6249 // If one of the two inputs is undefined then replicate the other one,
6250 // in order to avoid using another register unnecessarily.
6251 if (Op0.isUndef())
6252 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6253 else if (Op1.isUndef())
6254 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6255 else {
6256 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6257 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6258 }
6259 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6260}
6261
6262// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6263// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6264// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6265// would benefit from this representation and return it if so.
6267 BuildVectorSDNode *BVN) {
6268 EVT VT = BVN->getValueType(0);
6269 unsigned NumElements = VT.getVectorNumElements();
6270
6271 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6272 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6273 // need a BUILD_VECTOR, add an additional placeholder operand for that
6274 // BUILD_VECTOR and store its operands in ResidueOps.
6275 GeneralShuffle GS(VT);
6277 bool FoundOne = false;
6278 for (unsigned I = 0; I < NumElements; ++I) {
6279 SDValue Op = BVN->getOperand(I);
6280 if (Op.getOpcode() == ISD::TRUNCATE)
6281 Op = Op.getOperand(0);
6282 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6283 Op.getOperand(1).getOpcode() == ISD::Constant) {
6284 unsigned Elem = Op.getConstantOperandVal(1);
6285 if (!GS.add(Op.getOperand(0), Elem))
6286 return SDValue();
6287 FoundOne = true;
6288 } else if (Op.isUndef()) {
6289 GS.addUndef();
6290 } else {
6291 if (!GS.add(SDValue(), ResidueOps.size()))
6292 return SDValue();
6293 ResidueOps.push_back(BVN->getOperand(I));
6294 }
6295 }
6296
6297 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6298 if (!FoundOne)
6299 return SDValue();
6300
6301 // Create the BUILD_VECTOR for the remaining elements, if any.
6302 if (!ResidueOps.empty()) {
6303 while (ResidueOps.size() < NumElements)
6304 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6305 for (auto &Op : GS.Ops) {
6306 if (!Op.getNode()) {
6307 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6308 break;
6309 }
6310 }
6311 }
6312 return GS.getNode(DAG, SDLoc(BVN));
6313}
6314
6315bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6316 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6317 return true;
6318 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6319 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6320 return true;
6321 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6322 return true;
6323 return false;
6324}
6325
6326// Combine GPR scalar values Elems into a vector of type VT.
6327SDValue
6328SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6329 SmallVectorImpl<SDValue> &Elems) const {
6330 // See whether there is a single replicated value.
6332 unsigned int NumElements = Elems.size();
6333 unsigned int Count = 0;
6334 for (auto Elem : Elems) {
6335 if (!Elem.isUndef()) {
6336 if (!Single.getNode())
6337 Single = Elem;
6338 else if (Elem != Single) {
6339 Single = SDValue();
6340 break;
6341 }
6342 Count += 1;
6343 }
6344 }
6345 // There are three cases here:
6346 //
6347 // - if the only defined element is a loaded one, the best sequence
6348 // is a replicating load.
6349 //
6350 // - otherwise, if the only defined element is an i64 value, we will
6351 // end up with the same VLVGP sequence regardless of whether we short-cut
6352 // for replication or fall through to the later code.
6353 //
6354 // - otherwise, if the only defined element is an i32 or smaller value,
6355 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6356 // This is only a win if the single defined element is used more than once.
6357 // In other cases we're better off using a single VLVGx.
6358 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6359 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6360
6361 // If all elements are loads, use VLREP/VLEs (below).
6362 bool AllLoads = true;
6363 for (auto Elem : Elems)
6364 if (!isVectorElementLoad(Elem)) {
6365 AllLoads = false;
6366 break;
6367 }
6368
6369 // The best way of building a v2i64 from two i64s is to use VLVGP.
6370 if (VT == MVT::v2i64 && !AllLoads)
6371 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6372
6373 // Use a 64-bit merge high to combine two doubles.
6374 if (VT == MVT::v2f64 && !AllLoads)
6375 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6376
6377 // Build v4f32 values directly from the FPRs:
6378 //
6379 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6380 // V V VMRHF
6381 // <ABxx> <CDxx>
6382 // V VMRHG
6383 // <ABCD>
6384 if (VT == MVT::v4f32 && !AllLoads) {
6385 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6386 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6387 // Avoid unnecessary undefs by reusing the other operand.
6388 if (Op01.isUndef())
6389 Op01 = Op23;
6390 else if (Op23.isUndef())
6391 Op23 = Op01;
6392 // Merging identical replications is a no-op.
6393 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6394 return Op01;
6395 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6396 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6398 DL, MVT::v2i64, Op01, Op23);
6399 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6400 }
6401
6402 // Collect the constant terms.
6405
6406 unsigned NumConstants = 0;
6407 for (unsigned I = 0; I < NumElements; ++I) {
6408 SDValue Elem = Elems[I];
6409 if (Elem.getOpcode() == ISD::Constant ||
6410 Elem.getOpcode() == ISD::ConstantFP) {
6411 NumConstants += 1;
6412 Constants[I] = Elem;
6413 Done[I] = true;
6414 }
6415 }
6416 // If there was at least one constant, fill in the other elements of
6417 // Constants with undefs to get a full vector constant and use that
6418 // as the starting point.
6420 SDValue ReplicatedVal;
6421 if (NumConstants > 0) {
6422 for (unsigned I = 0; I < NumElements; ++I)
6423 if (!Constants[I].getNode())
6424 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6425 Result = DAG.getBuildVector(VT, DL, Constants);
6426 } else {
6427 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6428 // avoid a false dependency on any previous contents of the vector
6429 // register.
6430
6431 // Use a VLREP if at least one element is a load. Make sure to replicate
6432 // the load with the most elements having its value.
6433 std::map<const SDNode*, unsigned> UseCounts;
6434 SDNode *LoadMaxUses = nullptr;
6435 for (unsigned I = 0; I < NumElements; ++I)
6436 if (isVectorElementLoad(Elems[I])) {
6437 SDNode *Ld = Elems[I].getNode();
6438 unsigned Count = ++UseCounts[Ld];
6439 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6440 LoadMaxUses = Ld;
6441 }
6442 if (LoadMaxUses != nullptr) {
6443 ReplicatedVal = SDValue(LoadMaxUses, 0);
6444 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6445 } else {
6446 // Try to use VLVGP.
6447 unsigned I1 = NumElements / 2 - 1;
6448 unsigned I2 = NumElements - 1;
6449 bool Def1 = !Elems[I1].isUndef();
6450 bool Def2 = !Elems[I2].isUndef();
6451 if (Def1 || Def2) {
6452 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6453 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6454 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6455 joinDwords(DAG, DL, Elem1, Elem2));
6456 Done[I1] = true;
6457 Done[I2] = true;
6458 } else
6459 Result = DAG.getUNDEF(VT);
6460 }
6461 }
6462
6463 // Use VLVGx to insert the other elements.
6464 for (unsigned I = 0; I < NumElements; ++I)
6465 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6466 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6467 DAG.getConstant(I, DL, MVT::i32));
6468 return Result;
6469}
6470
6471SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6472 SelectionDAG &DAG) const {
6473 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6474 SDLoc DL(Op);
6475 EVT VT = Op.getValueType();
6476
6477 if (BVN->isConstant()) {
6478 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6479 return Op;
6480
6481 // Fall back to loading it from memory.
6482 return SDValue();
6483 }
6484
6485 // See if we should use shuffles to construct the vector from other vectors.
6486 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6487 return Res;
6488
6489 // Detect SCALAR_TO_VECTOR conversions.
6491 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6492
6493 // Otherwise use buildVector to build the vector up from GPRs.
6494 unsigned NumElements = Op.getNumOperands();
6496 for (unsigned I = 0; I < NumElements; ++I)
6497 Ops[I] = Op.getOperand(I);
6498 return buildVector(DAG, DL, VT, Ops);
6499}
6500
6501SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6502 SelectionDAG &DAG) const {
6503 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6504 SDLoc DL(Op);
6505 EVT VT = Op.getValueType();
6506 unsigned NumElements = VT.getVectorNumElements();
6507
6508 if (VSN->isSplat()) {
6509 SDValue Op0 = Op.getOperand(0);
6510 unsigned Index = VSN->getSplatIndex();
6511 assert(Index < VT.getVectorNumElements() &&
6512 "Splat index should be defined and in first operand");
6513 // See whether the value we're splatting is directly available as a scalar.
6514 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6516 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6517 // Otherwise keep it as a vector-to-vector operation.
6518 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6519 DAG.getTargetConstant(Index, DL, MVT::i32));
6520 }
6521
6522 GeneralShuffle GS(VT);
6523 for (unsigned I = 0; I < NumElements; ++I) {
6524 int Elt = VSN->getMaskElt(I);
6525 if (Elt < 0)
6526 GS.addUndef();
6527 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6528 unsigned(Elt) % NumElements))
6529 return SDValue();
6530 }
6531 return GS.getNode(DAG, SDLoc(VSN));
6532}
6533
6534SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6535 SelectionDAG &DAG) const {
6536 SDLoc DL(Op);
6537 // Just insert the scalar into element 0 of an undefined vector.
6538 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6539 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6540 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6541}
6542
6543SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6544 SelectionDAG &DAG) const {
6545 // Handle insertions of floating-point values.
6546 SDLoc DL(Op);
6547 SDValue Op0 = Op.getOperand(0);
6548 SDValue Op1 = Op.getOperand(1);
6549 SDValue Op2 = Op.getOperand(2);
6550 EVT VT = Op.getValueType();
6551
6552 // Insertions into constant indices of a v2f64 can be done using VPDI.
6553 // However, if the inserted value is a bitcast or a constant then it's
6554 // better to use GPRs, as below.
6555 if (VT == MVT::v2f64 &&
6556 Op1.getOpcode() != ISD::BITCAST &&
6557 Op1.getOpcode() != ISD::ConstantFP &&
6558 Op2.getOpcode() == ISD::Constant) {
6559 uint64_t Index = Op2->getAsZExtVal();
6560 unsigned Mask = VT.getVectorNumElements() - 1;
6561 if (Index <= Mask)
6562 return Op;
6563 }
6564
6565 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6566 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6567 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6568 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6569 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6570 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6571 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6572}
6573
6574SDValue
6575SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6576 SelectionDAG &DAG) const {
6577 // Handle extractions of floating-point values.
6578 SDLoc DL(Op);
6579 SDValue Op0 = Op.getOperand(0);
6580 SDValue Op1 = Op.getOperand(1);
6581 EVT VT = Op.getValueType();
6582 EVT VecVT = Op0.getValueType();
6583
6584 // Extractions of constant indices can be done directly.
6585 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6586 uint64_t Index = CIndexN->getZExtValue();
6587 unsigned Mask = VecVT.getVectorNumElements() - 1;
6588 if (Index <= Mask)
6589 return Op;
6590 }
6591
6592 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6593 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6594 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6595 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6596 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6597 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6598}
6599
6600SDValue SystemZTargetLowering::
6601lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6602 SDValue PackedOp = Op.getOperand(0);
6603 EVT OutVT = Op.getValueType();
6604 EVT InVT = PackedOp.getValueType();
6605 unsigned ToBits = OutVT.getScalarSizeInBits();
6606 unsigned FromBits = InVT.getScalarSizeInBits();
6607 unsigned StartOffset = 0;
6608
6609 // If the input is a VECTOR_SHUFFLE, there are a number of important
6610 // cases where we can directly implement the sign-extension of the
6611 // original input lanes of the shuffle.
6612 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6613 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6614 ArrayRef<int> ShuffleMask = SVN->getMask();
6615 int OutNumElts = OutVT.getVectorNumElements();
6616
6617 // Recognize the special case where the sign-extension can be done
6618 // by the VSEG instruction. Handled via the default expander.
6619 if (ToBits == 64 && OutNumElts == 2) {
6620 int NumElem = ToBits / FromBits;
6621 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6622 return SDValue();
6623 }
6624
6625 // Recognize the special case where we can fold the shuffle by
6626 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6627 int StartOffsetCandidate = -1;
6628 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6629 if (ShuffleMask[Elt] == -1)
6630 continue;
6631 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6632 if (StartOffsetCandidate == -1)
6633 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6634 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6635 continue;
6636 }
6637 StartOffsetCandidate = -1;
6638 break;
6639 }
6640 if (StartOffsetCandidate != -1) {
6641 StartOffset = StartOffsetCandidate;
6642 PackedOp = PackedOp.getOperand(0);
6643 }
6644 }
6645
6646 do {
6647 FromBits *= 2;
6648 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6649 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6650 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6651 if (StartOffset >= OutNumElts) {
6652 Opcode = SystemZISD::UNPACK_LOW;
6653 StartOffset -= OutNumElts;
6654 }
6655 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6656 } while (FromBits != ToBits);
6657 return PackedOp;
6658}
6659
6660// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6661SDValue SystemZTargetLowering::
6662lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6663 SDValue PackedOp = Op.getOperand(0);
6664 SDLoc DL(Op);
6665 EVT OutVT = Op.getValueType();
6666 EVT InVT = PackedOp.getValueType();
6667 unsigned InNumElts = InVT.getVectorNumElements();
6668 unsigned OutNumElts = OutVT.getVectorNumElements();
6669 unsigned NumInPerOut = InNumElts / OutNumElts;
6670
6671 SDValue ZeroVec =
6672 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6673
6674 SmallVector<int, 16> Mask(InNumElts);
6675 unsigned ZeroVecElt = InNumElts;
6676 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6677 unsigned MaskElt = PackedElt * NumInPerOut;
6678 unsigned End = MaskElt + NumInPerOut - 1;
6679 for (; MaskElt < End; MaskElt++)
6680 Mask[MaskElt] = ZeroVecElt++;
6681 Mask[MaskElt] = PackedElt;
6682 }
6683 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6684 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6685}
6686
6687SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6688 unsigned ByScalar) const {
6689 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6690 SDValue Op0 = Op.getOperand(0);
6691 SDValue Op1 = Op.getOperand(1);
6692 SDLoc DL(Op);
6693 EVT VT = Op.getValueType();
6694 unsigned ElemBitSize = VT.getScalarSizeInBits();
6695
6696 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6697 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6698 APInt SplatBits, SplatUndef;
6699 unsigned SplatBitSize;
6700 bool HasAnyUndefs;
6701 // Check for constant splats. Use ElemBitSize as the minimum element
6702 // width and reject splats that need wider elements.
6703 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6704 ElemBitSize, true) &&
6705 SplatBitSize == ElemBitSize) {
6706 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6707 DL, MVT::i32);
6708 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6709 }
6710 // Check for variable splats.
6711 BitVector UndefElements;
6712 SDValue Splat = BVN->getSplatValue(&UndefElements);
6713 if (Splat) {
6714 // Since i32 is the smallest legal type, we either need a no-op
6715 // or a truncation.
6716 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6717 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6718 }
6719 }
6720
6721 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6722 // and the shift amount is directly available in a GPR.
6723 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6724 if (VSN->isSplat()) {
6725 SDValue VSNOp0 = VSN->getOperand(0);
6726 unsigned Index = VSN->getSplatIndex();
6727 assert(Index < VT.getVectorNumElements() &&
6728 "Splat index should be defined and in first operand");
6729 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6730 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6731 // Since i32 is the smallest legal type, we either need a no-op
6732 // or a truncation.
6733 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6734 VSNOp0.getOperand(Index));
6735 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6736 }
6737 }
6738 }
6739
6740 // Otherwise just treat the current form as legal.
6741 return Op;
6742}
6743
6744SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6745 SDLoc DL(Op);
6746
6747 // i128 FSHL with a constant amount that is a multiple of 8 can be
6748 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6749 // facility, FSHL with a constant amount less than 8 can be implemented
6750 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6751 // combination of the two.
6752 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6753 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6754 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6755 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6756 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6757 if (ShiftAmt > 120) {
6758 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6759 // SHR_DOUBLE_BIT emits fewer instructions.
6760 SDValue Val =
6761 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6762 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6763 return DAG.getBitcast(MVT::i128, Val);
6764 }
6765 SmallVector<int, 16> Mask(16);
6766 for (unsigned Elt = 0; Elt < 16; Elt++)
6767 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6768 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6769 if ((ShiftAmt & 7) == 0)
6770 return DAG.getBitcast(MVT::i128, Shuf1);
6771 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6772 SDValue Val =
6773 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6774 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6775 return DAG.getBitcast(MVT::i128, Val);
6776 }
6777 }
6778
6779 return SDValue();
6780}
6781
6782SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6783 SDLoc DL(Op);
6784
6785 // i128 FSHR with a constant amount that is a multiple of 8 can be
6786 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6787 // facility, FSHR with a constant amount less than 8 can be implemented
6788 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6789 // combination of the two.
6790 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6791 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6792 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6793 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6794 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6795 if (ShiftAmt > 120) {
6796 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6797 // SHL_DOUBLE_BIT emits fewer instructions.
6798 SDValue Val =
6799 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6800 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6801 return DAG.getBitcast(MVT::i128, Val);
6802 }
6803 SmallVector<int, 16> Mask(16);
6804 for (unsigned Elt = 0; Elt < 16; Elt++)
6805 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6806 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6807 if ((ShiftAmt & 7) == 0)
6808 return DAG.getBitcast(MVT::i128, Shuf1);
6809 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6810 SDValue Val =
6811 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6812 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6813 return DAG.getBitcast(MVT::i128, Val);
6814 }
6815 }
6816
6817 return SDValue();
6818}
6819
6821 SDLoc DL(Op);
6822 SDValue Src = Op.getOperand(0);
6823 MVT DstVT = Op.getSimpleValueType();
6824
6826 unsigned SrcAS = N->getSrcAddressSpace();
6827
6828 assert(SrcAS != N->getDestAddressSpace() &&
6829 "addrspacecast must be between different address spaces");
6830
6831 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6832 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6833 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6834 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6835 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6836 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6837 } else if (DstVT == MVT::i32) {
6838 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6839 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
6840 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6841 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6842 } else {
6843 report_fatal_error("Bad address space in addrspacecast");
6844 }
6845 return Op;
6846}
6847
6848SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6849 SelectionDAG &DAG) const {
6850 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
6851 if (In.getSimpleValueType() != MVT::f16)
6852 return Op; // Legal
6853 return SDValue(); // Let legalizer emit the libcall.
6854}
6855
6857 MVT VT, SDValue Arg, SDLoc DL,
6858 SDValue Chain, bool IsStrict) const {
6859 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6860 MakeLibCallOptions CallOptions;
6861 SDValue Result;
6862 std::tie(Result, Chain) =
6863 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
6864 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
6865}
6866
6867SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6868 SelectionDAG &DAG) const {
6869 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
6870 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
6871 bool IsStrict = Op->isStrictFPOpcode();
6872 SDLoc DL(Op);
6873 MVT VT = Op.getSimpleValueType();
6874 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6875 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6876 EVT InVT = InOp.getValueType();
6877
6878 // FP to unsigned is not directly supported on z10. Promoting an i32
6879 // result to (signed) i64 doesn't generate an inexact condition (fp
6880 // exception) for values that are outside the i32 range but in the i64
6881 // range, so use the default expansion.
6882 if (!Subtarget.hasFPExtension() && !IsSigned)
6883 // Expand i32/i64. F16 values will be recognized to fit and extended.
6884 return SDValue();
6885
6886 // Conversion from f16 is done via f32.
6887 if (InOp.getSimpleValueType() == MVT::f16) {
6889 LowerOperationWrapper(Op.getNode(), Results, DAG);
6890 return DAG.getMergeValues(Results, DL);
6891 }
6892
6893 if (VT == MVT::i128) {
6894 RTLIB::Libcall LC =
6895 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
6896 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6897 }
6898
6899 return Op; // Legal
6900}
6901
6902SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
6903 SelectionDAG &DAG) const {
6904 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
6905 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
6906 bool IsStrict = Op->isStrictFPOpcode();
6907 SDLoc DL(Op);
6908 MVT VT = Op.getSimpleValueType();
6909 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6910 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6911 EVT InVT = InOp.getValueType();
6912
6913 // Conversion to f16 is done via f32.
6914 if (VT == MVT::f16) {
6916 LowerOperationWrapper(Op.getNode(), Results, DAG);
6917 return DAG.getMergeValues(Results, DL);
6918 }
6919
6920 // Unsigned to fp is not directly supported on z10.
6921 if (!Subtarget.hasFPExtension() && !IsSigned)
6922 return SDValue(); // Expand i64.
6923
6924 if (InVT == MVT::i128) {
6925 RTLIB::Libcall LC =
6926 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
6927 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6928 }
6929
6930 return Op; // Legal
6931}
6932
6933// Shift the lower 2 bytes of Op to the left in order to insert into the
6934// upper 2 bytes of the FP register.
6936 assert(Op.getSimpleValueType() == MVT::i64 &&
6937 "Expexted to convert i64 to f16.");
6938 SDLoc DL(Op);
6939 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6940 DAG.getConstant(48, DL, MVT::i64));
6941 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6942 SDValue F16Val =
6943 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6944 return F16Val;
6945}
6946
6947// Extract Op into GPR and shift the 2 f16 bytes to the right.
6949 assert(Op.getSimpleValueType() == MVT::f16 &&
6950 "Expected to convert f16 to i64.");
6951 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6952 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6953 SDValue(U32, 0), Op);
6954 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6955 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6956 DAG.getConstant(48, DL, MVT::i32));
6957 return Shft;
6958}
6959
6960// Lower an f16 LOAD in case of no vector support.
6961SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
6962 SelectionDAG &DAG) const {
6963 EVT RegVT = Op.getValueType();
6964 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
6965 (void)RegVT;
6966
6967 // Load as integer.
6968 SDLoc DL(Op);
6969 SDValue NewLd;
6970 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
6971 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
6972 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
6973 AtomicLd->getChain(), AtomicLd->getBasePtr(),
6974 AtomicLd->getMemOperand());
6975 } else {
6976 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
6977 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
6978 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
6979 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
6980 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
6981 }
6982 SDValue F16Val = convertToF16(NewLd, DAG);
6983 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
6984}
6985
6986// Lower an f16 STORE in case of no vector support.
6987SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
6988 SelectionDAG &DAG) const {
6989 SDLoc DL(Op);
6990 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
6991
6992 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
6993 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
6994 Shft, AtomicSt->getBasePtr(),
6995 AtomicSt->getMemOperand());
6996
6997 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
6998 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
6999 St->getMemOperand());
7000}
7001
7002SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7003 SelectionDAG &DAG) const {
7004 SDLoc DL(Op);
7005 MVT ResultVT = Op.getSimpleValueType();
7006 SDValue Arg = Op.getOperand(0);
7007 unsigned Check = Op.getConstantOperandVal(1);
7008
7009 unsigned TDCMask = 0;
7010 if (Check & fcSNan)
7012 if (Check & fcQNan)
7014 if (Check & fcPosInf)
7016 if (Check & fcNegInf)
7018 if (Check & fcPosNormal)
7020 if (Check & fcNegNormal)
7022 if (Check & fcPosSubnormal)
7024 if (Check & fcNegSubnormal)
7026 if (Check & fcPosZero)
7027 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7028 if (Check & fcNegZero)
7029 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7030 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7031
7032 if (Arg.getSimpleValueType() == MVT::f16)
7033 Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32);
7034 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7035 return getCCResult(DAG, Intr);
7036}
7037
7038SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7039 SelectionDAG &DAG) const {
7040 SDLoc DL(Op);
7041 SDValue Chain = Op.getOperand(0);
7042
7043 // STCKF only supports a memory operand, so we have to use a temporary.
7044 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7045 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7046 MachinePointerInfo MPI =
7048
7049 // Use STCFK to store the TOD clock into the temporary.
7050 SDValue StoreOps[] = {Chain, StackPtr};
7051 Chain = DAG.getMemIntrinsicNode(
7052 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7053 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7054
7055 // And read it back from there.
7056 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7057}
7058
7060 SelectionDAG &DAG) const {
7061 switch (Op.getOpcode()) {
7062 case ISD::FRAMEADDR:
7063 return lowerFRAMEADDR(Op, DAG);
7064 case ISD::RETURNADDR:
7065 return lowerRETURNADDR(Op, DAG);
7066 case ISD::BR_CC:
7067 return lowerBR_CC(Op, DAG);
7068 case ISD::SELECT_CC:
7069 return lowerSELECT_CC(Op, DAG);
7070 case ISD::SETCC:
7071 return lowerSETCC(Op, DAG);
7072 case ISD::STRICT_FSETCC:
7073 return lowerSTRICT_FSETCC(Op, DAG, false);
7075 return lowerSTRICT_FSETCC(Op, DAG, true);
7076 case ISD::GlobalAddress:
7077 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7079 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7080 case ISD::BlockAddress:
7081 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7082 case ISD::JumpTable:
7083 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7084 case ISD::ConstantPool:
7085 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7086 case ISD::BITCAST:
7087 return lowerBITCAST(Op, DAG);
7088 case ISD::VASTART:
7089 return lowerVASTART(Op, DAG);
7090 case ISD::VACOPY:
7091 return lowerVACOPY(Op, DAG);
7092 case ISD::DYNAMIC_STACKALLOC:
7093 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7094 case ISD::GET_DYNAMIC_AREA_OFFSET:
7095 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7096 case ISD::MULHS:
7097 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7098 case ISD::MULHU:
7099 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7100 case ISD::SMUL_LOHI:
7101 return lowerSMUL_LOHI(Op, DAG);
7102 case ISD::UMUL_LOHI:
7103 return lowerUMUL_LOHI(Op, DAG);
7104 case ISD::SDIVREM:
7105 return lowerSDIVREM(Op, DAG);
7106 case ISD::UDIVREM:
7107 return lowerUDIVREM(Op, DAG);
7108 case ISD::SADDO:
7109 case ISD::SSUBO:
7110 case ISD::UADDO:
7111 case ISD::USUBO:
7112 return lowerXALUO(Op, DAG);
7113 case ISD::UADDO_CARRY:
7114 case ISD::USUBO_CARRY:
7115 return lowerUADDSUBO_CARRY(Op, DAG);
7116 case ISD::OR:
7117 return lowerOR(Op, DAG);
7118 case ISD::CTPOP:
7119 return lowerCTPOP(Op, DAG);
7120 case ISD::VECREDUCE_ADD:
7121 return lowerVECREDUCE_ADD(Op, DAG);
7122 case ISD::ATOMIC_FENCE:
7123 return lowerATOMIC_FENCE(Op, DAG);
7124 case ISD::ATOMIC_SWAP:
7125 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7126 case ISD::ATOMIC_STORE:
7127 return lowerATOMIC_STORE(Op, DAG);
7128 case ISD::ATOMIC_LOAD:
7129 return lowerATOMIC_LOAD(Op, DAG);
7130 case ISD::ATOMIC_LOAD_ADD:
7131 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7132 case ISD::ATOMIC_LOAD_SUB:
7133 return lowerATOMIC_LOAD_SUB(Op, DAG);
7134 case ISD::ATOMIC_LOAD_AND:
7135 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7136 case ISD::ATOMIC_LOAD_OR:
7137 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7138 case ISD::ATOMIC_LOAD_XOR:
7139 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7140 case ISD::ATOMIC_LOAD_NAND:
7141 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7142 case ISD::ATOMIC_LOAD_MIN:
7143 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7144 case ISD::ATOMIC_LOAD_MAX:
7145 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7146 case ISD::ATOMIC_LOAD_UMIN:
7147 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7148 case ISD::ATOMIC_LOAD_UMAX:
7149 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7150 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
7151 return lowerATOMIC_CMP_SWAP(Op, DAG);
7152 case ISD::STACKSAVE:
7153 return lowerSTACKSAVE(Op, DAG);
7154 case ISD::STACKRESTORE:
7155 return lowerSTACKRESTORE(Op, DAG);
7156 case ISD::PREFETCH:
7157 return lowerPREFETCH(Op, DAG);
7159 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7161 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7162 case ISD::BUILD_VECTOR:
7163 return lowerBUILD_VECTOR(Op, DAG);
7165 return lowerVECTOR_SHUFFLE(Op, DAG);
7167 return lowerSCALAR_TO_VECTOR(Op, DAG);
7169 return lowerINSERT_VECTOR_ELT(Op, DAG);
7171 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7173 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7175 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7176 case ISD::SHL:
7177 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7178 case ISD::SRL:
7179 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7180 case ISD::SRA:
7181 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7182 case ISD::ADDRSPACECAST:
7183 return lowerAddrSpaceCast(Op, DAG);
7184 case ISD::ROTL:
7185 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7186 case ISD::FSHL:
7187 return lowerFSHL(Op, DAG);
7188 case ISD::FSHR:
7189 return lowerFSHR(Op, DAG);
7190 case ISD::FP_EXTEND:
7192 return lowerFP_EXTEND(Op, DAG);
7193 case ISD::FP_TO_UINT:
7194 case ISD::FP_TO_SINT:
7197 return lower_FP_TO_INT(Op, DAG);
7198 case ISD::UINT_TO_FP:
7199 case ISD::SINT_TO_FP:
7202 return lower_INT_TO_FP(Op, DAG);
7203 case ISD::LOAD:
7204 return lowerLoadF16(Op, DAG);
7205 case ISD::STORE:
7206 return lowerStoreF16(Op, DAG);
7207 case ISD::IS_FPCLASS:
7208 return lowerIS_FPCLASS(Op, DAG);
7209 case ISD::GET_ROUNDING:
7210 return lowerGET_ROUNDING(Op, DAG);
7211 case ISD::READCYCLECOUNTER:
7212 return lowerREADCYCLECOUNTER(Op, DAG);
7215 // These operations are legal on our platform, but we cannot actually
7216 // set the operation action to Legal as common code would treat this
7217 // as equivalent to Expand. Instead, we keep the operation action to
7218 // Custom and just leave them unchanged here.
7219 return Op;
7220
7221 default:
7222 llvm_unreachable("Unexpected node to lower");
7223 }
7224}
7225
7227 const SDLoc &SL) {
7228 // If i128 is legal, just use a normal bitcast.
7229 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7230 return DAG.getBitcast(MVT::f128, Src);
7231
7232 // Otherwise, f128 must live in FP128, so do a partwise move.
7234 &SystemZ::FP128BitRegClass);
7235
7236 SDValue Hi, Lo;
7237 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7238
7239 Hi = DAG.getBitcast(MVT::f64, Hi);
7240 Lo = DAG.getBitcast(MVT::f64, Lo);
7241
7242 SDNode *Pair = DAG.getMachineNode(
7243 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7244 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7245 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7246 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7247 return SDValue(Pair, 0);
7248}
7249
7251 const SDLoc &SL) {
7252 // If i128 is legal, just use a normal bitcast.
7253 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7254 return DAG.getBitcast(MVT::i128, Src);
7255
7256 // Otherwise, f128 must live in FP128, so do a partwise move.
7258 &SystemZ::FP128BitRegClass);
7259
7260 SDValue LoFP =
7261 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7262 SDValue HiFP =
7263 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7264 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7265 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7266
7267 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7268}
7269
7270// Lower operations with invalid operand or result types.
7271void
7274 SelectionDAG &DAG) const {
7275 switch (N->getOpcode()) {
7276 case ISD::ATOMIC_LOAD: {
7277 SDLoc DL(N);
7278 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7279 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7280 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7282 DL, Tys, Ops, MVT::i128, MMO);
7283
7284 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7285 if (N->getValueType(0) == MVT::f128)
7286 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7287 Results.push_back(Lowered);
7288 Results.push_back(Res.getValue(1));
7289 break;
7290 }
7291 case ISD::ATOMIC_STORE: {
7292 SDLoc DL(N);
7293 SDVTList Tys = DAG.getVTList(MVT::Other);
7294 SDValue Val = N->getOperand(1);
7295 if (Val.getValueType() == MVT::f128)
7296 Val = expandBitCastF128ToI128(DAG, Val, DL);
7297 Val = lowerI128ToGR128(DAG, Val);
7298
7299 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7300 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7302 DL, Tys, Ops, MVT::i128, MMO);
7303 // We have to enforce sequential consistency by performing a
7304 // serialization operation after the store.
7305 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7307 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7308 MVT::Other, Res), 0);
7309 Results.push_back(Res);
7310 break;
7311 }
7312 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
7313 SDLoc DL(N);
7314 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7315 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7316 lowerI128ToGR128(DAG, N->getOperand(2)),
7317 lowerI128ToGR128(DAG, N->getOperand(3)) };
7318 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7320 DL, Tys, Ops, MVT::i128, MMO);
7321 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7323 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7324 Results.push_back(lowerGR128ToI128(DAG, Res));
7325 Results.push_back(Success);
7326 Results.push_back(Res.getValue(2));
7327 break;
7328 }
7329 case ISD::BITCAST: {
7330 if (useSoftFloat())
7331 return;
7332 SDLoc DL(N);
7333 SDValue Src = N->getOperand(0);
7334 EVT SrcVT = Src.getValueType();
7335 EVT ResVT = N->getValueType(0);
7336 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7337 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7338 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7339 if (Subtarget.hasVector()) {
7340 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7341 Results.push_back(SDValue(
7342 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7343 } else {
7344 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7345 Results.push_back(convertToF16(In64, DAG));
7346 }
7347 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7348 SDValue ExtractedVal =
7349 Subtarget.hasVector()
7350 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7351 0)
7352 : convertFromF16(Src, DL, DAG);
7353 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7354 }
7355 break;
7356 }
7357 case ISD::UINT_TO_FP:
7358 case ISD::SINT_TO_FP:
7361 if (useSoftFloat())
7362 return;
7363 bool IsStrict = N->isStrictFPOpcode();
7364 SDLoc DL(N);
7365 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7366 EVT ResVT = N->getValueType(0);
7367 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7368 if (ResVT == MVT::f16) {
7369 if (!IsStrict) {
7370 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7371 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7372 } else {
7373 SDValue OpF32 =
7374 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7375 {Chain, InOp});
7376 SDValue F16Res;
7377 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7378 OpF32, OpF32.getValue(1), DL, MVT::f16);
7379 Results.push_back(F16Res);
7380 Results.push_back(Chain);
7381 }
7382 }
7383 break;
7384 }
7385 case ISD::FP_TO_UINT:
7386 case ISD::FP_TO_SINT:
7389 if (useSoftFloat())
7390 return;
7391 bool IsStrict = N->isStrictFPOpcode();
7392 SDLoc DL(N);
7393 EVT ResVT = N->getValueType(0);
7394 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7395 EVT InVT = InOp->getValueType(0);
7396 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7397 if (InVT == MVT::f16) {
7398 if (!IsStrict) {
7399 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7400 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7401 } else {
7402 SDValue InF32;
7403 std::tie(InF32, Chain) =
7404 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7405 SDValue OpF32 =
7406 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7407 {Chain, InF32});
7408 Results.push_back(OpF32);
7409 Results.push_back(OpF32.getValue(1));
7410 }
7411 }
7412 break;
7413 }
7414 default:
7415 llvm_unreachable("Unexpected node to lower");
7416 }
7417}
7418
7419void
7425
7426const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
7427#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
7428 switch ((SystemZISD::NodeType)Opcode) {
7429 case SystemZISD::FIRST_NUMBER: break;
7430 OPCODE(RET_GLUE);
7431 OPCODE(CALL);
7432 OPCODE(SIBCALL);
7433 OPCODE(TLS_GDCALL);
7434 OPCODE(TLS_LDCALL);
7435 OPCODE(PCREL_WRAPPER);
7436 OPCODE(PCREL_OFFSET);
7437 OPCODE(ICMP);
7438 OPCODE(FCMP);
7439 OPCODE(STRICT_FCMP);
7440 OPCODE(STRICT_FCMPS);
7441 OPCODE(TM);
7442 OPCODE(BR_CCMASK);
7443 OPCODE(SELECT_CCMASK);
7444 OPCODE(ADJDYNALLOC);
7445 OPCODE(PROBED_ALLOCA);
7446 OPCODE(POPCNT);
7447 OPCODE(SMUL_LOHI);
7448 OPCODE(UMUL_LOHI);
7449 OPCODE(SDIVREM);
7450 OPCODE(UDIVREM);
7451 OPCODE(SADDO);
7452 OPCODE(SSUBO);
7453 OPCODE(UADDO);
7454 OPCODE(USUBO);
7455 OPCODE(ADDCARRY);
7456 OPCODE(SUBCARRY);
7457 OPCODE(GET_CCMASK);
7458 OPCODE(MVC);
7459 OPCODE(NC);
7460 OPCODE(OC);
7461 OPCODE(XC);
7462 OPCODE(CLC);
7463 OPCODE(MEMSET_MVC);
7464 OPCODE(STPCPY);
7465 OPCODE(STRCMP);
7466 OPCODE(SEARCH_STRING);
7467 OPCODE(IPM);
7468 OPCODE(TBEGIN);
7469 OPCODE(TBEGIN_NOFLOAT);
7470 OPCODE(TEND);
7471 OPCODE(BYTE_MASK);
7472 OPCODE(ROTATE_MASK);
7473 OPCODE(REPLICATE);
7474 OPCODE(JOIN_DWORDS);
7475 OPCODE(SPLAT);
7476 OPCODE(MERGE_HIGH);
7477 OPCODE(MERGE_LOW);
7478 OPCODE(SHL_DOUBLE);
7479 OPCODE(PERMUTE_DWORDS);
7480 OPCODE(PERMUTE);
7481 OPCODE(PACK);
7482 OPCODE(PACKS_CC);
7483 OPCODE(PACKLS_CC);
7484 OPCODE(UNPACK_HIGH);
7485 OPCODE(UNPACKL_HIGH);
7486 OPCODE(UNPACK_LOW);
7487 OPCODE(UNPACKL_LOW);
7488 OPCODE(VSHL_BY_SCALAR);
7489 OPCODE(VSRL_BY_SCALAR);
7490 OPCODE(VSRA_BY_SCALAR);
7491 OPCODE(VROTL_BY_SCALAR);
7492 OPCODE(SHL_DOUBLE_BIT);
7493 OPCODE(SHR_DOUBLE_BIT);
7494 OPCODE(VSUM);
7495 OPCODE(VACC);
7496 OPCODE(VSCBI);
7497 OPCODE(VAC);
7498 OPCODE(VSBI);
7499 OPCODE(VACCC);
7500 OPCODE(VSBCBI);
7501 OPCODE(VMAH);
7502 OPCODE(VMALH);
7503 OPCODE(VME);
7504 OPCODE(VMLE);
7505 OPCODE(VMO);
7506 OPCODE(VMLO);
7507 OPCODE(VICMPE);
7508 OPCODE(VICMPH);
7509 OPCODE(VICMPHL);
7510 OPCODE(VICMPES);
7511 OPCODE(VICMPHS);
7512 OPCODE(VICMPHLS);
7513 OPCODE(VFCMPE);
7514 OPCODE(STRICT_VFCMPE);
7515 OPCODE(STRICT_VFCMPES);
7516 OPCODE(VFCMPH);
7517 OPCODE(STRICT_VFCMPH);
7518 OPCODE(STRICT_VFCMPHS);
7519 OPCODE(VFCMPHE);
7520 OPCODE(STRICT_VFCMPHE);
7521 OPCODE(STRICT_VFCMPHES);
7522 OPCODE(VFCMPES);
7523 OPCODE(VFCMPHS);
7524 OPCODE(VFCMPHES);
7525 OPCODE(VFTCI);
7526 OPCODE(VEXTEND);
7527 OPCODE(STRICT_VEXTEND);
7528 OPCODE(VROUND);
7529 OPCODE(STRICT_VROUND);
7530 OPCODE(VTM);
7531 OPCODE(SCMP128HI);
7532 OPCODE(UCMP128HI);
7533 OPCODE(VFAE_CC);
7534 OPCODE(VFAEZ_CC);
7535 OPCODE(VFEE_CC);
7536 OPCODE(VFEEZ_CC);
7537 OPCODE(VFENE_CC);
7538 OPCODE(VFENEZ_CC);
7539 OPCODE(VISTR_CC);
7540 OPCODE(VSTRC_CC);
7541 OPCODE(VSTRCZ_CC);
7542 OPCODE(VSTRS_CC);
7543 OPCODE(VSTRSZ_CC);
7544 OPCODE(TDC);
7545 OPCODE(ATOMIC_SWAPW);
7546 OPCODE(ATOMIC_LOADW_ADD);
7547 OPCODE(ATOMIC_LOADW_SUB);
7548 OPCODE(ATOMIC_LOADW_AND);
7549 OPCODE(ATOMIC_LOADW_OR);
7550 OPCODE(ATOMIC_LOADW_XOR);
7551 OPCODE(ATOMIC_LOADW_NAND);
7552 OPCODE(ATOMIC_LOADW_MIN);
7553 OPCODE(ATOMIC_LOADW_MAX);
7554 OPCODE(ATOMIC_LOADW_UMIN);
7555 OPCODE(ATOMIC_LOADW_UMAX);
7556 OPCODE(ATOMIC_CMP_SWAPW);
7557 OPCODE(ATOMIC_CMP_SWAP);
7558 OPCODE(ATOMIC_LOAD_128);
7559 OPCODE(ATOMIC_STORE_128);
7560 OPCODE(ATOMIC_CMP_SWAP_128);
7561 OPCODE(LRV);
7562 OPCODE(STRV);
7563 OPCODE(VLER);
7564 OPCODE(VSTER);
7565 OPCODE(STCKF);
7567 OPCODE(ADA_ENTRY);
7568 }
7569 return nullptr;
7570#undef OPCODE
7571}
7572
7573// Return true if VT is a vector whose elements are a whole number of bytes
7574// in width. Also check for presence of vector support.
7575bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7576 if (!Subtarget.hasVector())
7577 return false;
7578
7579 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7580}
7581
7582// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7583// producing a result of type ResVT. Op is a possibly bitcast version
7584// of the input vector and Index is the index (based on type VecVT) that
7585// should be extracted. Return the new extraction if a simplification
7586// was possible or if Force is true.
7587SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7588 EVT VecVT, SDValue Op,
7589 unsigned Index,
7590 DAGCombinerInfo &DCI,
7591 bool Force) const {
7592 SelectionDAG &DAG = DCI.DAG;
7593
7594 // The number of bytes being extracted.
7595 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7596
7597 for (;;) {
7598 unsigned Opcode = Op.getOpcode();
7599 if (Opcode == ISD::BITCAST)
7600 // Look through bitcasts.
7601 Op = Op.getOperand(0);
7602 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7603 canTreatAsByteVector(Op.getValueType())) {
7604 // Get a VPERM-like permute mask and see whether the bytes covered
7605 // by the extracted element are a contiguous sequence from one
7606 // source operand.
7608 if (!getVPermMask(Op, Bytes))
7609 break;
7610 int First;
7611 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7612 BytesPerElement, First))
7613 break;
7614 if (First < 0)
7615 return DAG.getUNDEF(ResVT);
7616 // Make sure the contiguous sequence starts at a multiple of the
7617 // original element size.
7618 unsigned Byte = unsigned(First) % Bytes.size();
7619 if (Byte % BytesPerElement != 0)
7620 break;
7621 // We can get the extracted value directly from an input.
7622 Index = Byte / BytesPerElement;
7623 Op = Op.getOperand(unsigned(First) / Bytes.size());
7624 Force = true;
7625 } else if (Opcode == ISD::BUILD_VECTOR &&
7626 canTreatAsByteVector(Op.getValueType())) {
7627 // We can only optimize this case if the BUILD_VECTOR elements are
7628 // at least as wide as the extracted value.
7629 EVT OpVT = Op.getValueType();
7630 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7631 if (OpBytesPerElement < BytesPerElement)
7632 break;
7633 // Make sure that the least-significant bit of the extracted value
7634 // is the least significant bit of an input.
7635 unsigned End = (Index + 1) * BytesPerElement;
7636 if (End % OpBytesPerElement != 0)
7637 break;
7638 // We're extracting the low part of one operand of the BUILD_VECTOR.
7639 Op = Op.getOperand(End / OpBytesPerElement - 1);
7640 if (!Op.getValueType().isInteger()) {
7641 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7642 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7643 DCI.AddToWorklist(Op.getNode());
7644 }
7645 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7646 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7647 if (VT != ResVT) {
7648 DCI.AddToWorklist(Op.getNode());
7649 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7650 }
7651 return Op;
7652 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7654 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7655 canTreatAsByteVector(Op.getValueType()) &&
7656 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7657 // Make sure that only the unextended bits are significant.
7658 EVT ExtVT = Op.getValueType();
7659 EVT OpVT = Op.getOperand(0).getValueType();
7660 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7661 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7662 unsigned Byte = Index * BytesPerElement;
7663 unsigned SubByte = Byte % ExtBytesPerElement;
7664 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7665 if (SubByte < MinSubByte ||
7666 SubByte + BytesPerElement > ExtBytesPerElement)
7667 break;
7668 // Get the byte offset of the unextended element
7669 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7670 // ...then add the byte offset relative to that element.
7671 Byte += SubByte - MinSubByte;
7672 if (Byte % BytesPerElement != 0)
7673 break;
7674 Op = Op.getOperand(0);
7675 Index = Byte / BytesPerElement;
7676 Force = true;
7677 } else
7678 break;
7679 }
7680 if (Force) {
7681 if (Op.getValueType() != VecVT) {
7682 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7683 DCI.AddToWorklist(Op.getNode());
7684 }
7685 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7686 DAG.getConstant(Index, DL, MVT::i32));
7687 }
7688 return SDValue();
7689}
7690
7691// Optimize vector operations in scalar value Op on the basis that Op
7692// is truncated to TruncVT.
7693SDValue SystemZTargetLowering::combineTruncateExtract(
7694 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7695 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7696 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7697 // of type TruncVT.
7698 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7699 TruncVT.getSizeInBits() % 8 == 0) {
7700 SDValue Vec = Op.getOperand(0);
7701 EVT VecVT = Vec.getValueType();
7702 if (canTreatAsByteVector(VecVT)) {
7703 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7704 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7705 unsigned TruncBytes = TruncVT.getStoreSize();
7706 if (BytesPerElement % TruncBytes == 0) {
7707 // Calculate the value of Y' in the above description. We are
7708 // splitting the original elements into Scale equal-sized pieces
7709 // and for truncation purposes want the last (least-significant)
7710 // of these pieces for IndexN. This is easiest to do by calculating
7711 // the start index of the following element and then subtracting 1.
7712 unsigned Scale = BytesPerElement / TruncBytes;
7713 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7714
7715 // Defer the creation of the bitcast from X to combineExtract,
7716 // which might be able to optimize the extraction.
7717 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7718 MVT::getIntegerVT(TruncBytes * 8),
7719 VecVT.getStoreSize() / TruncBytes);
7720 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7721 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7722 }
7723 }
7724 }
7725 }
7726 return SDValue();
7727}
7728
7729SDValue SystemZTargetLowering::combineZERO_EXTEND(
7730 SDNode *N, DAGCombinerInfo &DCI) const {
7731 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7732 SelectionDAG &DAG = DCI.DAG;
7733 SDValue N0 = N->getOperand(0);
7734 EVT VT = N->getValueType(0);
7736 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7737 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7738 if (TrueOp && FalseOp) {
7739 SDLoc DL(N0);
7740 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7741 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7742 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7743 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7744 // If N0 has multiple uses, change other uses as well.
7745 if (!N0.hasOneUse()) {
7746 SDValue TruncSelect =
7747 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7748 DCI.CombineTo(N0.getNode(), TruncSelect);
7749 }
7750 return NewSelect;
7751 }
7752 }
7753 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7754 // of the result is smaller than the size of X and all the truncated bits
7755 // of X are already zero.
7756 if (N0.getOpcode() == ISD::XOR &&
7757 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7758 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7759 N0.getOperand(1).getOpcode() == ISD::Constant) {
7760 SDValue X = N0.getOperand(0).getOperand(0);
7761 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7762 KnownBits Known = DAG.computeKnownBits(X);
7763 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7764 N0.getValueSizeInBits(),
7765 VT.getSizeInBits());
7766 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7767 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7768 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7769 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7770 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7771 }
7772 }
7773 }
7774 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7775 // and VECTOR ADD COMPUTE CARRY for i128:
7776 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7777 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7778 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7779 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7780 // For vector types, these patterns are recognized in the .td file.
7781 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7782 N0.getOperand(0).getValueType() == VT) {
7783 SDValue Op0 = N0.getOperand(0);
7784 SDValue Op1 = N0.getOperand(1);
7785 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7786 switch (CC) {
7787 case ISD::SETULE:
7788 std::swap(Op0, Op1);
7789 [[fallthrough]];
7790 case ISD::SETUGE:
7791 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7792 case ISD::SETUGT:
7793 std::swap(Op0, Op1);
7794 [[fallthrough]];
7795 case ISD::SETULT:
7796 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7797 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7798 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7799 Op0->getOperand(1));
7800 break;
7801 default:
7802 break;
7803 }
7804 }
7805
7806 return SDValue();
7807}
7808
7809SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7810 SDNode *N, DAGCombinerInfo &DCI) const {
7811 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7812 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7813 // into (select_cc LHS, RHS, -1, 0, COND)
7814 SelectionDAG &DAG = DCI.DAG;
7815 SDValue N0 = N->getOperand(0);
7816 EVT VT = N->getValueType(0);
7817 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7818 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7819 N0 = N0.getOperand(0);
7820 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7821 SDLoc DL(N0);
7822 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7823 DAG.getAllOnesConstant(DL, VT),
7824 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7825 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7826 }
7827 return SDValue();
7828}
7829
7830SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7831 SDNode *N, DAGCombinerInfo &DCI) const {
7832 // Convert (sext (ashr (shl X, C1), C2)) to
7833 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7834 // cheap as narrower ones.
7835 SelectionDAG &DAG = DCI.DAG;
7836 SDValue N0 = N->getOperand(0);
7837 EVT VT = N->getValueType(0);
7838 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7839 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7840 SDValue Inner = N0.getOperand(0);
7841 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7842 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7843 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7844 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7845 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7846 EVT ShiftVT = N0.getOperand(1).getValueType();
7847 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7848 Inner.getOperand(0));
7849 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7850 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7851 ShiftVT));
7852 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7853 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7854 }
7855 }
7856 }
7857
7858 return SDValue();
7859}
7860
7861SDValue SystemZTargetLowering::combineMERGE(
7862 SDNode *N, DAGCombinerInfo &DCI) const {
7863 SelectionDAG &DAG = DCI.DAG;
7864 unsigned Opcode = N->getOpcode();
7865 SDValue Op0 = N->getOperand(0);
7866 SDValue Op1 = N->getOperand(1);
7867 if (Op0.getOpcode() == ISD::BITCAST)
7868 Op0 = Op0.getOperand(0);
7870 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7871 // for v4f32.
7872 if (Op1 == N->getOperand(0))
7873 return Op1;
7874 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7875 EVT VT = Op1.getValueType();
7876 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7877 if (ElemBytes <= 4) {
7878 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7880 EVT InVT = VT.changeVectorElementTypeToInteger();
7881 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7882 SystemZ::VectorBytes / ElemBytes / 2);
7883 if (VT != InVT) {
7884 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7885 DCI.AddToWorklist(Op1.getNode());
7886 }
7887 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7888 DCI.AddToWorklist(Op.getNode());
7889 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7890 }
7891 }
7892 return SDValue();
7893}
7894
7895static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7896 SDNode *&HiPart) {
7897 LoPart = HiPart = nullptr;
7898
7899 // Scan through all users.
7900 for (SDUse &Use : LD->uses()) {
7901 // Skip the uses of the chain.
7902 if (Use.getResNo() != 0)
7903 continue;
7904
7905 // Verify every user is a TRUNCATE to i64 of the low or high half.
7906 SDNode *User = Use.getUser();
7907 bool IsLoPart = true;
7908 if (User->getOpcode() == ISD::SRL &&
7909 User->getOperand(1).getOpcode() == ISD::Constant &&
7910 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7911 User = *User->user_begin();
7912 IsLoPart = false;
7913 }
7914 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7915 return false;
7916
7917 if (IsLoPart) {
7918 if (LoPart)
7919 return false;
7920 LoPart = User;
7921 } else {
7922 if (HiPart)
7923 return false;
7924 HiPart = User;
7925 }
7926 }
7927 return true;
7928}
7929
7930static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7931 SDNode *&HiPart) {
7932 LoPart = HiPart = nullptr;
7933
7934 // Scan through all users.
7935 for (SDUse &Use : LD->uses()) {
7936 // Skip the uses of the chain.
7937 if (Use.getResNo() != 0)
7938 continue;
7939
7940 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7941 SDNode *User = Use.getUser();
7942 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7943 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7944 return false;
7945
7946 switch (User->getConstantOperandVal(1)) {
7947 case SystemZ::subreg_l64:
7948 if (LoPart)
7949 return false;
7950 LoPart = User;
7951 break;
7952 case SystemZ::subreg_h64:
7953 if (HiPart)
7954 return false;
7955 HiPart = User;
7956 break;
7957 default:
7958 return false;
7959 }
7960 }
7961 return true;
7962}
7963
7964SDValue SystemZTargetLowering::combineLOAD(
7965 SDNode *N, DAGCombinerInfo &DCI) const {
7966 SelectionDAG &DAG = DCI.DAG;
7967 EVT LdVT = N->getValueType(0);
7968 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7969 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7970 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7971 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7972 if (PtrVT != LoadNodeVT) {
7973 SDLoc DL(LN);
7974 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7975 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7976 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7977 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7978 LN->getMemOperand());
7979 }
7980 }
7981 }
7982 SDLoc DL(N);
7983
7984 // Replace a 128-bit load that is used solely to move its value into GPRs
7985 // by separate loads of both halves.
7986 LoadSDNode *LD = cast<LoadSDNode>(N);
7987 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7988 SDNode *LoPart, *HiPart;
7989 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7990 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7991 // Rewrite each extraction as an independent load.
7992 SmallVector<SDValue, 2> ArgChains;
7993 if (HiPart) {
7994 SDValue EltLoad = DAG.getLoad(
7995 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7996 LD->getPointerInfo(), LD->getBaseAlign(),
7997 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7998
7999 DCI.CombineTo(HiPart, EltLoad, true);
8000 ArgChains.push_back(EltLoad.getValue(1));
8001 }
8002 if (LoPart) {
8003 SDValue EltLoad = DAG.getLoad(
8004 LoPart->getValueType(0), DL, LD->getChain(),
8005 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
8006 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
8007 LD->getMemOperand()->getFlags(), LD->getAAInfo());
8008
8009 DCI.CombineTo(LoPart, EltLoad, true);
8010 ArgChains.push_back(EltLoad.getValue(1));
8011 }
8012
8013 // Collect all chains via TokenFactor.
8014 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
8015 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
8016 DCI.AddToWorklist(Chain.getNode());
8017 return SDValue(N, 0);
8018 }
8019 }
8020
8021 if (LdVT.isVector() || LdVT.isInteger())
8022 return SDValue();
8023 // Transform a scalar load that is REPLICATEd as well as having other
8024 // use(s) to the form where the other use(s) use the first element of the
8025 // REPLICATE instead of the load. Otherwise instruction selection will not
8026 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
8027 // point loads.
8028
8029 SDValue Replicate;
8030 SmallVector<SDNode*, 8> OtherUses;
8031 for (SDUse &Use : N->uses()) {
8032 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
8033 if (Replicate)
8034 return SDValue(); // Should never happen
8035 Replicate = SDValue(Use.getUser(), 0);
8036 } else if (Use.getResNo() == 0)
8037 OtherUses.push_back(Use.getUser());
8038 }
8039 if (!Replicate || OtherUses.empty())
8040 return SDValue();
8041
8042 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
8043 Replicate, DAG.getConstant(0, DL, MVT::i32));
8044 // Update uses of the loaded Value while preserving old chains.
8045 for (SDNode *U : OtherUses) {
8047 for (SDValue Op : U->ops())
8048 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
8049 DAG.UpdateNodeOperands(U, Ops);
8050 }
8051 return SDValue(N, 0);
8052}
8053
8054bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
8055 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
8056 return true;
8057 if (Subtarget.hasVectorEnhancements2())
8058 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
8059 return true;
8060 return false;
8061}
8062
8064 if (!VT.isVector() || !VT.isSimple() ||
8065 VT.getSizeInBits() != 128 ||
8066 VT.getScalarSizeInBits() % 8 != 0)
8067 return false;
8068
8069 unsigned NumElts = VT.getVectorNumElements();
8070 for (unsigned i = 0; i < NumElts; ++i) {
8071 if (M[i] < 0) continue; // ignore UNDEF indices
8072 if ((unsigned) M[i] != NumElts - 1 - i)
8073 return false;
8074 }
8075
8076 return true;
8077}
8078
8079static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
8080 for (auto *U : StoredVal->users()) {
8081 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
8082 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
8083 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
8084 continue;
8085 } else if (isa<BuildVectorSDNode>(U)) {
8086 SDValue BuildVector = SDValue(U, 0);
8087 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
8088 isOnlyUsedByStores(BuildVector, DAG))
8089 continue;
8090 }
8091 return false;
8092 }
8093 return true;
8094}
8095
8096static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
8097 SDValue &HiPart) {
8098 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
8099 return false;
8100
8101 SDValue Op0 = Val.getOperand(0);
8102 SDValue Op1 = Val.getOperand(1);
8103
8104 if (Op0.getOpcode() == ISD::SHL)
8105 std::swap(Op0, Op1);
8106 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
8107 Op1.getOperand(1).getOpcode() != ISD::Constant ||
8108 Op1.getConstantOperandVal(1) != 64)
8109 return false;
8110 Op1 = Op1.getOperand(0);
8111
8112 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
8113 Op0.getOperand(0).getValueType() != MVT::i64)
8114 return false;
8115 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
8116 Op1.getOperand(0).getValueType() != MVT::i64)
8117 return false;
8118
8119 LoPart = Op0.getOperand(0);
8120 HiPart = Op1.getOperand(0);
8121 return true;
8122}
8123
8124static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8125 SDValue &HiPart) {
8126 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8127 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8128 return false;
8129
8130 if (Val->getNumOperands() != 5 ||
8131 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8132 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8133 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8134 return false;
8135
8136 LoPart = Val->getOperand(1);
8137 HiPart = Val->getOperand(3);
8138 return true;
8139}
8140
8141SDValue SystemZTargetLowering::combineSTORE(
8142 SDNode *N, DAGCombinerInfo &DCI) const {
8143 SelectionDAG &DAG = DCI.DAG;
8144 auto *SN = cast<StoreSDNode>(N);
8145 auto &Op1 = N->getOperand(1);
8146 EVT MemVT = SN->getMemoryVT();
8147
8148 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8149 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8150 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8151 if (PtrVT != StoreNodeVT) {
8152 SDLoc DL(SN);
8153 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8154 SYSTEMZAS::PTR32, 0);
8155 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8156 SN->getPointerInfo(), SN->getBaseAlign(),
8157 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8158 }
8159 }
8160
8161 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8162 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8163 // If X has wider elements then convert it to:
8164 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8165 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8166 if (SDValue Value =
8167 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8168 DCI.AddToWorklist(Value.getNode());
8169
8170 // Rewrite the store with the new form of stored value.
8171 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8172 SN->getBasePtr(), SN->getMemoryVT(),
8173 SN->getMemOperand());
8174 }
8175 }
8176 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8177 if (!SN->isTruncatingStore() &&
8178 Op1.getOpcode() == ISD::BSWAP &&
8179 Op1.getNode()->hasOneUse() &&
8180 canLoadStoreByteSwapped(Op1.getValueType())) {
8181
8182 SDValue BSwapOp = Op1.getOperand(0);
8183
8184 if (BSwapOp.getValueType() == MVT::i16)
8185 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8186
8187 SDValue Ops[] = {
8188 N->getOperand(0), BSwapOp, N->getOperand(2)
8189 };
8190
8191 return
8192 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8193 Ops, MemVT, SN->getMemOperand());
8194 }
8195 // Combine STORE (element-swap) into VSTER
8196 if (!SN->isTruncatingStore() &&
8197 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8198 Op1.getNode()->hasOneUse() &&
8199 Subtarget.hasVectorEnhancements2()) {
8200 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8201 ArrayRef<int> ShuffleMask = SVN->getMask();
8202 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8203 SDValue Ops[] = {
8204 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8205 };
8206
8207 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8208 DAG.getVTList(MVT::Other),
8209 Ops, MemVT, SN->getMemOperand());
8210 }
8211 }
8212
8213 // Combine STORE (READCYCLECOUNTER) into STCKF.
8214 if (!SN->isTruncatingStore() &&
8215 Op1.getOpcode() == ISD::READCYCLECOUNTER &&
8216 Op1.hasOneUse() &&
8217 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8218 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8219 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8220 DAG.getVTList(MVT::Other),
8221 Ops, MemVT, SN->getMemOperand());
8222 }
8223
8224 // Transform a store of a 128-bit value moved from parts into two stores.
8225 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8226 SDValue LoPart, HiPart;
8227 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8228 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8229 SDLoc DL(SN);
8230 SDValue Chain0 = DAG.getStore(
8231 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8232 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8233 SDValue Chain1 = DAG.getStore(
8234 SN->getChain(), DL, LoPart,
8235 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8236 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8237 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8238
8239 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8240 }
8241 }
8242
8243 // Replicate a reg or immediate with VREP instead of scalar multiply or
8244 // immediate load. It seems best to do this during the first DAGCombine as
8245 // it is straight-forward to handle the zero-extend node in the initial
8246 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8247 // extracting an i16 element from a v16i8 vector).
8248 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8249 isOnlyUsedByStores(Op1, DAG)) {
8250 SDValue Word = SDValue();
8251 EVT WordVT;
8252
8253 // Find a replicated immediate and return it if found in Word and its
8254 // type in WordVT.
8255 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8256 // Some constants are better handled with a scalar store.
8257 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8258 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8259 return;
8260
8261 APInt Val = C->getAPIntValue();
8262 // Truncate Val in case of a truncating store.
8263 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8264 assert(SN->isTruncatingStore() &&
8265 "Non-truncating store and immediate value does not fit?");
8266 Val = Val.trunc(TotBytes * 8);
8267 }
8268
8269 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8270 if (VCI.isVectorConstantLegal(Subtarget) &&
8271 VCI.Opcode == SystemZISD::REPLICATE) {
8272 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8273 WordVT = VCI.VecVT.getScalarType();
8274 }
8275 };
8276
8277 // Find a replicated register and return it if found in Word and its type
8278 // in WordVT.
8279 auto FindReplicatedReg = [&](SDValue MulOp) {
8280 EVT MulVT = MulOp.getValueType();
8281 if (MulOp->getOpcode() == ISD::MUL &&
8282 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8283 // Find a zero extended value and its type.
8284 SDValue LHS = MulOp->getOperand(0);
8285 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8286 WordVT = LHS->getOperand(0).getValueType();
8287 else if (LHS->getOpcode() == ISD::AssertZext)
8288 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8289 else
8290 return;
8291 // Find a replicating constant, e.g. 0x00010001.
8292 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8293 SystemZVectorConstantInfo VCI(
8294 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8295 if (VCI.isVectorConstantLegal(Subtarget) &&
8296 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8297 WordVT == VCI.VecVT.getScalarType())
8298 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8299 }
8300 }
8301 };
8302
8303 if (isa<BuildVectorSDNode>(Op1) &&
8304 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8305 SDValue SplatVal = Op1->getOperand(0);
8306 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8307 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8308 else
8309 FindReplicatedReg(SplatVal);
8310 } else {
8311 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8312 FindReplicatedImm(C, MemVT.getStoreSize());
8313 else
8314 FindReplicatedReg(Op1);
8315 }
8316
8317 if (Word != SDValue()) {
8318 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8319 "Bad type handling");
8320 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8321 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8322 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8323 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8324 SN->getBasePtr(), SN->getMemOperand());
8325 }
8326 }
8327
8328 return SDValue();
8329}
8330
8331SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8332 SDNode *N, DAGCombinerInfo &DCI) const {
8333 SelectionDAG &DAG = DCI.DAG;
8334 // Combine element-swap (LOAD) into VLER
8335 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8336 N->getOperand(0).hasOneUse() &&
8337 Subtarget.hasVectorEnhancements2()) {
8338 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8339 ArrayRef<int> ShuffleMask = SVN->getMask();
8340 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8341 SDValue Load = N->getOperand(0);
8342 LoadSDNode *LD = cast<LoadSDNode>(Load);
8343
8344 // Create the element-swapping load.
8345 SDValue Ops[] = {
8346 LD->getChain(), // Chain
8347 LD->getBasePtr() // Ptr
8348 };
8349 SDValue ESLoad =
8351 DAG.getVTList(LD->getValueType(0), MVT::Other),
8352 Ops, LD->getMemoryVT(), LD->getMemOperand());
8353
8354 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8355 // by the load dead.
8356 DCI.CombineTo(N, ESLoad);
8357
8358 // Next, combine the load away, we give it a bogus result value but a real
8359 // chain result. The result value is dead because the shuffle is dead.
8360 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8361
8362 // Return N so it doesn't get rechecked!
8363 return SDValue(N, 0);
8364 }
8365 }
8366
8367 return SDValue();
8368}
8369
8370SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8371 SDNode *N, DAGCombinerInfo &DCI) const {
8372 SelectionDAG &DAG = DCI.DAG;
8373
8374 if (!Subtarget.hasVector())
8375 return SDValue();
8376
8377 // Look through bitcasts that retain the number of vector elements.
8378 SDValue Op = N->getOperand(0);
8379 if (Op.getOpcode() == ISD::BITCAST &&
8380 Op.getValueType().isVector() &&
8381 Op.getOperand(0).getValueType().isVector() &&
8382 Op.getValueType().getVectorNumElements() ==
8383 Op.getOperand(0).getValueType().getVectorNumElements())
8384 Op = Op.getOperand(0);
8385
8386 // Pull BSWAP out of a vector extraction.
8387 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8388 EVT VecVT = Op.getValueType();
8389 EVT EltVT = VecVT.getVectorElementType();
8390 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8391 Op.getOperand(0), N->getOperand(1));
8392 DCI.AddToWorklist(Op.getNode());
8393 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8394 if (EltVT != N->getValueType(0)) {
8395 DCI.AddToWorklist(Op.getNode());
8396 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8397 }
8398 return Op;
8399 }
8400
8401 // Try to simplify a vector extraction.
8402 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8403 SDValue Op0 = N->getOperand(0);
8404 EVT VecVT = Op0.getValueType();
8405 if (canTreatAsByteVector(VecVT))
8406 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8407 IndexN->getZExtValue(), DCI, false);
8408 }
8409 return SDValue();
8410}
8411
8412SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8413 SDNode *N, DAGCombinerInfo &DCI) const {
8414 SelectionDAG &DAG = DCI.DAG;
8415 // (join_dwords X, X) == (replicate X)
8416 if (N->getOperand(0) == N->getOperand(1))
8417 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8418 N->getOperand(0));
8419 return SDValue();
8420}
8421
8423 SDValue Chain1 = N1->getOperand(0);
8424 SDValue Chain2 = N2->getOperand(0);
8425
8426 // Trivial case: both nodes take the same chain.
8427 if (Chain1 == Chain2)
8428 return Chain1;
8429
8430 // FIXME - we could handle more complex cases via TokenFactor,
8431 // assuming we can verify that this would not create a cycle.
8432 return SDValue();
8433}
8434
8435SDValue SystemZTargetLowering::combineFP_ROUND(
8436 SDNode *N, DAGCombinerInfo &DCI) const {
8437
8438 if (!Subtarget.hasVector())
8439 return SDValue();
8440
8441 // (fpround (extract_vector_elt X 0))
8442 // (fpround (extract_vector_elt X 1)) ->
8443 // (extract_vector_elt (VROUND X) 0)
8444 // (extract_vector_elt (VROUND X) 2)
8445 //
8446 // This is a special case since the target doesn't really support v2f32s.
8447 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8448 SelectionDAG &DAG = DCI.DAG;
8449 SDValue Op0 = N->getOperand(OpNo);
8450 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8452 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8453 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8454 Op0.getConstantOperandVal(1) == 0) {
8455 SDValue Vec = Op0.getOperand(0);
8456 for (auto *U : Vec->users()) {
8457 if (U != Op0.getNode() && U->hasOneUse() &&
8458 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8459 U->getOperand(0) == Vec &&
8460 U->getOperand(1).getOpcode() == ISD::Constant &&
8461 U->getConstantOperandVal(1) == 1) {
8462 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8463 if (OtherRound.getOpcode() == N->getOpcode() &&
8464 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8465 OtherRound.getValueType() == MVT::f32) {
8466 SDValue VRound, Chain;
8467 if (N->isStrictFPOpcode()) {
8468 Chain = MergeInputChains(N, OtherRound.getNode());
8469 if (!Chain)
8470 continue;
8471 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8472 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8473 Chain = VRound.getValue(1);
8474 } else
8475 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8476 MVT::v4f32, Vec);
8477 DCI.AddToWorklist(VRound.getNode());
8478 SDValue Extract1 =
8479 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8480 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8481 DCI.AddToWorklist(Extract1.getNode());
8482 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8483 if (Chain)
8484 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8485 SDValue Extract0 =
8486 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8487 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8488 if (Chain)
8489 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8490 N->getVTList(), Extract0, Chain);
8491 return Extract0;
8492 }
8493 }
8494 }
8495 }
8496 return SDValue();
8497}
8498
8499SDValue SystemZTargetLowering::combineFP_EXTEND(
8500 SDNode *N, DAGCombinerInfo &DCI) const {
8501
8502 if (!Subtarget.hasVector())
8503 return SDValue();
8504
8505 // (fpextend (extract_vector_elt X 0))
8506 // (fpextend (extract_vector_elt X 2)) ->
8507 // (extract_vector_elt (VEXTEND X) 0)
8508 // (extract_vector_elt (VEXTEND X) 1)
8509 //
8510 // This is a special case since the target doesn't really support v2f32s.
8511 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8512 SelectionDAG &DAG = DCI.DAG;
8513 SDValue Op0 = N->getOperand(OpNo);
8514 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8516 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8517 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8518 Op0.getConstantOperandVal(1) == 0) {
8519 SDValue Vec = Op0.getOperand(0);
8520 for (auto *U : Vec->users()) {
8521 if (U != Op0.getNode() && U->hasOneUse() &&
8522 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8523 U->getOperand(0) == Vec &&
8524 U->getOperand(1).getOpcode() == ISD::Constant &&
8525 U->getConstantOperandVal(1) == 2) {
8526 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8527 if (OtherExtend.getOpcode() == N->getOpcode() &&
8528 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8529 OtherExtend.getValueType() == MVT::f64) {
8530 SDValue VExtend, Chain;
8531 if (N->isStrictFPOpcode()) {
8532 Chain = MergeInputChains(N, OtherExtend.getNode());
8533 if (!Chain)
8534 continue;
8535 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8536 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8537 Chain = VExtend.getValue(1);
8538 } else
8539 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8540 MVT::v2f64, Vec);
8541 DCI.AddToWorklist(VExtend.getNode());
8542 SDValue Extract1 =
8543 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8544 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8545 DCI.AddToWorklist(Extract1.getNode());
8546 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8547 if (Chain)
8548 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8549 SDValue Extract0 =
8550 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8551 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8552 if (Chain)
8553 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8554 N->getVTList(), Extract0, Chain);
8555 return Extract0;
8556 }
8557 }
8558 }
8559 }
8560 return SDValue();
8561}
8562
8563SDValue SystemZTargetLowering::combineINT_TO_FP(
8564 SDNode *N, DAGCombinerInfo &DCI) const {
8565 if (DCI.Level != BeforeLegalizeTypes)
8566 return SDValue();
8567 SelectionDAG &DAG = DCI.DAG;
8568 LLVMContext &Ctx = *DAG.getContext();
8569 unsigned Opcode = N->getOpcode();
8570 EVT OutVT = N->getValueType(0);
8571 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8572 SDValue Op = N->getOperand(0);
8573 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8574 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8575
8576 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8577 // v2f64 = uint_to_fp v2i16
8578 // =>
8579 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8580 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8581 OutScalarBits <= 64) {
8582 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8583 EVT ExtVT = EVT::getVectorVT(
8584 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8585 unsigned ExtOpcode =
8587 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8588 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8589 }
8590 return SDValue();
8591}
8592
8593SDValue SystemZTargetLowering::combineFCOPYSIGN(
8594 SDNode *N, DAGCombinerInfo &DCI) const {
8595 SelectionDAG &DAG = DCI.DAG;
8596 EVT VT = N->getValueType(0);
8597 SDValue ValOp = N->getOperand(0);
8598 SDValue SignOp = N->getOperand(1);
8599
8600 // Remove the rounding which is not needed.
8601 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8602 SDValue WideOp = SignOp.getOperand(0);
8603 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8604 }
8605
8606 return SDValue();
8607}
8608
8609SDValue SystemZTargetLowering::combineBSWAP(
8610 SDNode *N, DAGCombinerInfo &DCI) const {
8611 SelectionDAG &DAG = DCI.DAG;
8612 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8613 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8614 N->getOperand(0).hasOneUse() &&
8615 canLoadStoreByteSwapped(N->getValueType(0))) {
8616 SDValue Load = N->getOperand(0);
8617 LoadSDNode *LD = cast<LoadSDNode>(Load);
8618
8619 // Create the byte-swapping load.
8620 SDValue Ops[] = {
8621 LD->getChain(), // Chain
8622 LD->getBasePtr() // Ptr
8623 };
8624 EVT LoadVT = N->getValueType(0);
8625 if (LoadVT == MVT::i16)
8626 LoadVT = MVT::i32;
8627 SDValue BSLoad =
8629 DAG.getVTList(LoadVT, MVT::Other),
8630 Ops, LD->getMemoryVT(), LD->getMemOperand());
8631
8632 // If this is an i16 load, insert the truncate.
8633 SDValue ResVal = BSLoad;
8634 if (N->getValueType(0) == MVT::i16)
8635 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8636
8637 // First, combine the bswap away. This makes the value produced by the
8638 // load dead.
8639 DCI.CombineTo(N, ResVal);
8640
8641 // Next, combine the load away, we give it a bogus result value but a real
8642 // chain result. The result value is dead because the bswap is dead.
8643 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8644
8645 // Return N so it doesn't get rechecked!
8646 return SDValue(N, 0);
8647 }
8648
8649 // Look through bitcasts that retain the number of vector elements.
8650 SDValue Op = N->getOperand(0);
8651 if (Op.getOpcode() == ISD::BITCAST &&
8652 Op.getValueType().isVector() &&
8653 Op.getOperand(0).getValueType().isVector() &&
8654 Op.getValueType().getVectorNumElements() ==
8655 Op.getOperand(0).getValueType().getVectorNumElements())
8656 Op = Op.getOperand(0);
8657
8658 // Push BSWAP into a vector insertion if at least one side then simplifies.
8659 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8660 SDValue Vec = Op.getOperand(0);
8661 SDValue Elt = Op.getOperand(1);
8662 SDValue Idx = Op.getOperand(2);
8663
8665 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8667 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8668 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8669 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8670 EVT VecVT = N->getValueType(0);
8671 EVT EltVT = N->getValueType(0).getVectorElementType();
8672 if (VecVT != Vec.getValueType()) {
8673 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8674 DCI.AddToWorklist(Vec.getNode());
8675 }
8676 if (EltVT != Elt.getValueType()) {
8677 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8678 DCI.AddToWorklist(Elt.getNode());
8679 }
8680 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8681 DCI.AddToWorklist(Vec.getNode());
8682 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8683 DCI.AddToWorklist(Elt.getNode());
8684 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8685 Vec, Elt, Idx);
8686 }
8687 }
8688
8689 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8690 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8691 if (SV && Op.hasOneUse()) {
8692 SDValue Op0 = Op.getOperand(0);
8693 SDValue Op1 = Op.getOperand(1);
8694
8696 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8698 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8699 EVT VecVT = N->getValueType(0);
8700 if (VecVT != Op0.getValueType()) {
8701 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8702 DCI.AddToWorklist(Op0.getNode());
8703 }
8704 if (VecVT != Op1.getValueType()) {
8705 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8706 DCI.AddToWorklist(Op1.getNode());
8707 }
8708 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8709 DCI.AddToWorklist(Op0.getNode());
8710 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8711 DCI.AddToWorklist(Op1.getNode());
8712 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8713 }
8714 }
8715
8716 return SDValue();
8717}
8718
8719SDValue SystemZTargetLowering::combineSETCC(
8720 SDNode *N, DAGCombinerInfo &DCI) const {
8721 SelectionDAG &DAG = DCI.DAG;
8722 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8723 const SDValue LHS = N->getOperand(0);
8724 const SDValue RHS = N->getOperand(1);
8725 bool CmpNull = isNullConstant(RHS);
8726 bool CmpAllOnes = isAllOnesConstant(RHS);
8727 EVT VT = N->getValueType(0);
8728 SDLoc DL(N);
8729
8730 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8731 // change the outer compare to a i128 compare. This will normally
8732 // allow the reduction to be recognized in adjustICmp128, and even if
8733 // not, the i128 compare will still generate better code.
8734 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8736 if (Src.getOpcode() == ISD::SETCC &&
8737 Src.getValueType().isFixedLengthVector() &&
8738 Src.getValueType().getScalarType() == MVT::i1) {
8739 EVT CmpVT = Src.getOperand(0).getValueType();
8740 if (CmpVT.getSizeInBits() == 128) {
8741 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8742 SDValue LHS =
8743 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8744 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8745 : DAG.getAllOnesConstant(DL, MVT::i128);
8746 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8747 N->getFlags());
8748 }
8749 }
8750 }
8751
8752 return SDValue();
8753}
8754
8755static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
8756 switch (Val.getOpcode()) {
8757 default:
8758 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8759 case SystemZISD::IPM:
8760 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8762 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8763 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8765 SDValue Op4CCReg = Val.getOperand(4);
8766 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8767 Op4CCReg.getOpcode() == SystemZISD::TM) {
8768 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
8769 if (OpCC != SDValue())
8770 return std::make_pair(OpCC, OpCCValid);
8771 }
8772 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8773 if (!CCValid)
8774 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8775 int CCValidVal = CCValid->getZExtValue();
8776 return std::make_pair(Op4CCReg, CCValidVal);
8777 }
8778 case ISD::ADD:
8779 case ISD::AND:
8780 case ISD::OR:
8781 case ISD::XOR:
8782 case ISD::SHL:
8783 case ISD::SRA:
8784 case ISD::SRL:
8785 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
8786 if (Op0CC != SDValue())
8787 return std::make_pair(Op0CC, Op0CCValid);
8788 return findCCUse(Val.getOperand(1));
8789 }
8790}
8791
8792static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8793 SelectionDAG &DAG);
8794
8796 SelectionDAG &DAG) {
8797 SDLoc DL(Val);
8798 auto Opcode = Val.getOpcode();
8799 switch (Opcode) {
8800 default:
8801 return {};
8802 case ISD::Constant:
8803 return {Val, Val, Val, Val};
8804 case SystemZISD::IPM: {
8805 SDValue IPMOp0 = Val.getOperand(0);
8806 if (IPMOp0 != CC)
8807 return {};
8808 SmallVector<SDValue, 4> ShiftedCCVals;
8809 for (auto CC : {0, 1, 2, 3})
8810 ShiftedCCVals.emplace_back(
8811 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8812 return ShiftedCCVals;
8813 }
8815 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8816 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8817 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8818 if (!CCValid || !CCMask)
8819 return {};
8820
8821 int CCValidVal = CCValid->getZExtValue();
8822 int CCMaskVal = CCMask->getZExtValue();
8823 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8824 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8825 if (TrueSDVals.empty() || FalseSDVals.empty())
8826 return {};
8827 SDValue Op4CCReg = Val.getOperand(4);
8828 if (Op4CCReg != CC)
8829 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8830 if (Op4CCReg != CC)
8831 return {};
8832 SmallVector<SDValue, 4> MergedSDVals;
8833 for (auto &CCVal : {0, 1, 2, 3})
8834 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8835 ? TrueSDVals[CCVal]
8836 : FalseSDVals[CCVal]);
8837 return MergedSDVals;
8838 }
8839 case ISD::ADD:
8840 case ISD::AND:
8841 case ISD::OR:
8842 case ISD::XOR:
8843 case ISD::SRA:
8844 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8845 // would clobber CC).
8846 if (!Val.hasOneUse())
8847 return {};
8848 [[fallthrough]];
8849 case ISD::SHL:
8850 case ISD::SRL:
8851 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8852 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8853 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8854 if (Op0SDVals.empty() || Op1SDVals.empty())
8855 return {};
8856 SmallVector<SDValue, 4> BinaryOpSDVals;
8857 for (auto CCVal : {0, 1, 2, 3})
8858 BinaryOpSDVals.emplace_back(DAG.getNode(
8859 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8860 return BinaryOpSDVals;
8861 }
8862}
8863
8864static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8865 SelectionDAG &DAG) {
8866 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8867 // set by the CCReg instruction using the CCValid / CCMask masks,
8868 // If the CCReg instruction is itself a ICMP / TM testing the condition
8869 // code set by some other instruction, see whether we can directly
8870 // use that condition code.
8871 auto *CCNode = CCReg.getNode();
8872 if (!CCNode)
8873 return false;
8874
8875 if (CCNode->getOpcode() == SystemZISD::TM) {
8876 if (CCValid != SystemZ::CCMASK_TM)
8877 return false;
8878 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8879 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8880 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8881 if (!Op0Node || !Op1Node)
8882 return -1;
8883 auto Op0APVal = Op0Node->getAPIntValue();
8884 auto Op1APVal = Op1Node->getAPIntValue();
8885 auto Result = Op0APVal & Op1APVal;
8886 bool AllOnes = Result == Op1APVal;
8887 bool AllZeros = Result == 0;
8888 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
8889 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8890 };
8891 SDValue Op0 = CCNode->getOperand(0);
8892 SDValue Op1 = CCNode->getOperand(1);
8893 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8894 if (Op0CC == SDValue())
8895 return false;
8896 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8897 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8898 if (Op0SDVals.empty() || Op1SDVals.empty())
8899 return false;
8900 int NewCCMask = 0;
8901 for (auto CC : {0, 1, 2, 3}) {
8902 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8903 if (CCVal < 0)
8904 return false;
8905 NewCCMask <<= 1;
8906 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8907 }
8908 NewCCMask &= Op0CCValid;
8909 CCReg = Op0CC;
8910 CCMask = NewCCMask;
8911 CCValid = Op0CCValid;
8912 return true;
8913 }
8914 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8915 CCValid != SystemZ::CCMASK_ICMP)
8916 return false;
8917
8918 SDValue CmpOp0 = CCNode->getOperand(0);
8919 SDValue CmpOp1 = CCNode->getOperand(1);
8920 SDValue CmpOp2 = CCNode->getOperand(2);
8921 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8922 if (Op0CC != SDValue()) {
8923 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8924 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8925 if (Op0SDVals.empty() || Op1SDVals.empty())
8926 return false;
8927
8928 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8929 auto CmpTypeVal = CmpType->getZExtValue();
8930 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8931 const SDValue &Op1Val) {
8932 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8933 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8934 if (!Op0Node || !Op1Node)
8935 return -1;
8936 auto Op0APVal = Op0Node->getAPIntValue();
8937 auto Op1APVal = Op1Node->getAPIntValue();
8938 if (CmpTypeVal == SystemZICMP::SignedOnly)
8939 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8940 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8941 };
8942 int NewCCMask = 0;
8943 for (auto CC : {0, 1, 2, 3}) {
8944 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8945 if (CCVal < 0)
8946 return false;
8947 NewCCMask <<= 1;
8948 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8949 }
8950 NewCCMask &= Op0CCValid;
8951 CCMask = NewCCMask;
8952 CCReg = Op0CC;
8953 CCValid = Op0CCValid;
8954 return true;
8955 }
8956
8957 return false;
8958}
8959
8960// Merging versus split in multiple branches cost.
8963 const Value *Lhs,
8964 const Value *Rhs) const {
8965 const auto isFlagOutOpCC = [](const Value *V) {
8966 using namespace llvm::PatternMatch;
8967 const Value *RHSVal;
8968 const APInt *RHSC;
8969 if (const auto *I = dyn_cast<Instruction>(V)) {
8970 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8971 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8972 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8973 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8974 if (CB->isInlineAsm()) {
8975 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8976 return IA && IA->getConstraintString().contains("{@cc}");
8977 }
8978 }
8979 }
8980 }
8981 return false;
8982 };
8983 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8984 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8985 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8986 // conditionals will be merged or else conditionals will be split.
8987 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8988 return {3, 0, -1};
8989 // Default.
8990 return {-1, -1, -1};
8991}
8992
8993SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8994 DAGCombinerInfo &DCI) const {
8995 SelectionDAG &DAG = DCI.DAG;
8996
8997 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8998 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8999 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9000 if (!CCValid || !CCMask)
9001 return SDValue();
9002
9003 int CCValidVal = CCValid->getZExtValue();
9004 int CCMaskVal = CCMask->getZExtValue();
9005 SDValue Chain = N->getOperand(0);
9006 SDValue CCReg = N->getOperand(4);
9007 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG))
9008 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
9009 Chain,
9010 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9011 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
9012 N->getOperand(3), CCReg);
9013 return SDValue();
9014}
9015
9016SDValue SystemZTargetLowering::combineSELECT_CCMASK(
9017 SDNode *N, DAGCombinerInfo &DCI) const {
9018 SelectionDAG &DAG = DCI.DAG;
9019
9020 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
9021 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
9022 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
9023 if (!CCValid || !CCMask)
9024 return SDValue();
9025
9026 int CCValidVal = CCValid->getZExtValue();
9027 int CCMaskVal = CCMask->getZExtValue();
9028 SDValue CCReg = N->getOperand(4);
9029
9030 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
9031
9032 // Populate SDVals vector for each condition code ccval for given Val, which
9033 // can again be another nested select_ccmask with the same CC.
9034 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
9035 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
9037 if (Val.getOperand(4) != CCReg)
9038 return SmallVector<SDValue, 4>{};
9039 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
9040 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
9041 if (!CCMask)
9042 return SmallVector<SDValue, 4>{};
9043
9044 int CCMaskVal = CCMask->getZExtValue();
9045 for (auto &CC : {0, 1, 2, 3})
9046 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
9047 : FalseVal);
9048 return Res;
9049 }
9050 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
9051 };
9052 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
9053 // with CCReg found by combineCCMask or original CCReg.
9054 SDValue TrueVal = N->getOperand(0);
9055 SDValue FalseVal = N->getOperand(1);
9056 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
9057 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
9058 // TrueSDVals/FalseSDVals might be empty in case of non-constant
9059 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
9060 if (TrueSDVals.empty())
9061 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
9062 if (FalseSDVals.empty())
9063 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
9064 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
9065 SmallSet<SDValue, 4> MergedSDValsSet;
9066 // Ignoring CC values outside CCValiid.
9067 for (auto CC : {0, 1, 2, 3}) {
9068 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
9069 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
9070 ? TrueSDVals[CC]
9071 : FalseSDVals[CC]);
9072 }
9073 if (MergedSDValsSet.size() == 1)
9074 return *MergedSDValsSet.begin();
9075 if (MergedSDValsSet.size() == 2) {
9076 auto BeginIt = MergedSDValsSet.begin();
9077 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
9078 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
9079 std::swap(NewTrueVal, NewFalseVal);
9080 int NewCCMask = 0;
9081 for (auto CC : {0, 1, 2, 3}) {
9082 NewCCMask <<= 1;
9083 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
9084 ? (TrueSDVals[CC] == NewTrueVal)
9085 : (FalseSDVals[CC] == NewTrueVal);
9086 }
9087 CCMaskVal = NewCCMask;
9088 CCMaskVal &= CCValidVal;
9089 TrueVal = NewTrueVal;
9090 FalseVal = NewFalseVal;
9091 IsCombinedCCReg = true;
9092 }
9093 }
9094
9095 if (IsCombinedCCReg)
9096 return DAG.getNode(
9097 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
9098 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9099 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
9100
9101 return SDValue();
9102}
9103
9104SDValue SystemZTargetLowering::combineGET_CCMASK(
9105 SDNode *N, DAGCombinerInfo &DCI) const {
9106
9107 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
9108 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9109 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9110 if (!CCValid || !CCMask)
9111 return SDValue();
9112 int CCValidVal = CCValid->getZExtValue();
9113 int CCMaskVal = CCMask->getZExtValue();
9114
9115 SDValue Select = N->getOperand(0);
9116 if (Select->getOpcode() == ISD::TRUNCATE)
9117 Select = Select->getOperand(0);
9118 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9119 return SDValue();
9120
9121 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
9122 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
9123 if (!SelectCCValid || !SelectCCMask)
9124 return SDValue();
9125 int SelectCCValidVal = SelectCCValid->getZExtValue();
9126 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9127
9128 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
9129 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
9130 if (!TrueVal || !FalseVal)
9131 return SDValue();
9132 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9133 ;
9134 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9135 SelectCCMaskVal ^= SelectCCValidVal;
9136 else
9137 return SDValue();
9138
9139 if (SelectCCValidVal & ~CCValidVal)
9140 return SDValue();
9141 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9142 return SDValue();
9143
9144 return Select->getOperand(4);
9145}
9146
9147SDValue SystemZTargetLowering::combineIntDIVREM(
9148 SDNode *N, DAGCombinerInfo &DCI) const {
9149 SelectionDAG &DAG = DCI.DAG;
9150 EVT VT = N->getValueType(0);
9151 // In the case where the divisor is a vector of constants a cheaper
9152 // sequence of instructions can replace the divide. BuildSDIV is called to
9153 // do this during DAG combining, but it only succeeds when it can build a
9154 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9155 // since it is not Legal but Custom it can only happen before
9156 // legalization. Therefore we must scalarize this early before Combine
9157 // 1. For widened vectors, this is already the result of type legalization.
9158 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9159 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9160 return DAG.UnrollVectorOp(N);
9161 return SDValue();
9162}
9163
9164
9165// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9166// This is closely modeled after the common-code combineShiftToMULH.
9167SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9168 SDNode *N, DAGCombinerInfo &DCI) const {
9169 SelectionDAG &DAG = DCI.DAG;
9170 SDLoc DL(N);
9171
9172 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9173 "SRL or SRA node is required here!");
9174
9175 if (!Subtarget.hasVector())
9176 return SDValue();
9177
9178 // Check the shift amount. Proceed with the transformation if the shift
9179 // amount is constant.
9180 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9181 if (!ShiftAmtSrc)
9182 return SDValue();
9183
9184 // The operation feeding into the shift must be an add.
9185 SDValue ShiftOperand = N->getOperand(0);
9186 if (ShiftOperand.getOpcode() != ISD::ADD)
9187 return SDValue();
9188
9189 // One operand of the add must be a multiply.
9190 SDValue MulOp = ShiftOperand.getOperand(0);
9191 SDValue AddOp = ShiftOperand.getOperand(1);
9192 if (MulOp.getOpcode() != ISD::MUL) {
9193 if (AddOp.getOpcode() != ISD::MUL)
9194 return SDValue();
9195 std::swap(MulOp, AddOp);
9196 }
9197
9198 // All operands must be equivalent extend nodes.
9199 SDValue LeftOp = MulOp.getOperand(0);
9200 SDValue RightOp = MulOp.getOperand(1);
9201
9202 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9203 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9204
9205 if (!IsSignExt && !IsZeroExt)
9206 return SDValue();
9207
9208 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9209 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9210
9211 SDValue MulhRightOp;
9212 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9213 unsigned ActiveBits = IsSignExt
9214 ? Constant->getAPIntValue().getSignificantBits()
9215 : Constant->getAPIntValue().getActiveBits();
9216 if (ActiveBits > NarrowVTSize)
9217 return SDValue();
9218 MulhRightOp = DAG.getConstant(
9219 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9220 NarrowVT);
9221 } else {
9222 if (LeftOp.getOpcode() != RightOp.getOpcode())
9223 return SDValue();
9224 // Check that the two extend nodes are the same type.
9225 if (NarrowVT != RightOp.getOperand(0).getValueType())
9226 return SDValue();
9227 MulhRightOp = RightOp.getOperand(0);
9228 }
9229
9230 SDValue MulhAddOp;
9231 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9232 unsigned ActiveBits = IsSignExt
9233 ? Constant->getAPIntValue().getSignificantBits()
9234 : Constant->getAPIntValue().getActiveBits();
9235 if (ActiveBits > NarrowVTSize)
9236 return SDValue();
9237 MulhAddOp = DAG.getConstant(
9238 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9239 NarrowVT);
9240 } else {
9241 if (LeftOp.getOpcode() != AddOp.getOpcode())
9242 return SDValue();
9243 // Check that the two extend nodes are the same type.
9244 if (NarrowVT != AddOp.getOperand(0).getValueType())
9245 return SDValue();
9246 MulhAddOp = AddOp.getOperand(0);
9247 }
9248
9249 EVT WideVT = LeftOp.getValueType();
9250 // Proceed with the transformation if the wide types match.
9251 assert((WideVT == RightOp.getValueType()) &&
9252 "Cannot have a multiply node with two different operand types.");
9253 assert((WideVT == AddOp.getValueType()) &&
9254 "Cannot have an add node with two different operand types.");
9255
9256 // Proceed with the transformation if the wide type is twice as large
9257 // as the narrow type.
9258 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9259 return SDValue();
9260
9261 // Check the shift amount with the narrow type size.
9262 // Proceed with the transformation if the shift amount is the width
9263 // of the narrow type.
9264 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9265 if (ShiftAmt != NarrowVTSize)
9266 return SDValue();
9267
9268 // Proceed if we support the multiply-and-add-high operation.
9269 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9270 NarrowVT == MVT::v4i32 ||
9271 (Subtarget.hasVectorEnhancements3() &&
9272 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9273 return SDValue();
9274
9275 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9277 DL, NarrowVT, LeftOp.getOperand(0),
9278 MulhRightOp, MulhAddOp);
9279 bool IsSigned = N->getOpcode() == ISD::SRA;
9280 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9281}
9282
9283// Op is an operand of a multiplication. Check whether this can be folded
9284// into an even/odd widening operation; if so, return the opcode to be used
9285// and update Op to the appropriate sub-operand. Note that the caller must
9286// verify that *both* operands of the multiplication support the operation.
9288 const SystemZSubtarget &Subtarget,
9289 SDValue &Op) {
9290 EVT VT = Op.getValueType();
9291
9292 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9293 // to selecting the even or odd vector elements.
9294 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9295 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9296 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9297 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9298 unsigned NumElts = VT.getVectorNumElements();
9299 Op = Op.getOperand(0);
9300 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9301 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9303 ArrayRef<int> ShuffleMask = SVN->getMask();
9304 bool CanUseEven = true, CanUseOdd = true;
9305 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9306 if (ShuffleMask[Elt] == -1)
9307 continue;
9308 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9309 CanUseEven = false;
9310 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9311 CanUseOdd = false;
9312 }
9313 Op = Op.getOperand(0);
9314 if (CanUseEven)
9315 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9316 if (CanUseOdd)
9317 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9318 }
9319 }
9320
9321 // For z17, we can also support the v2i64->i128 case, which looks like
9322 // (sign/zero_extend (extract_vector_elt X 0/1))
9323 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9324 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9325 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9326 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9327 Op = Op.getOperand(0);
9328 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9329 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9330 Op.getOperand(1).getOpcode() == ISD::Constant) {
9331 unsigned Elem = Op.getConstantOperandVal(1);
9332 Op = Op.getOperand(0);
9333 if (Elem == 0)
9334 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9335 if (Elem == 1)
9336 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9337 }
9338 }
9339
9340 return 0;
9341}
9342
9343SDValue SystemZTargetLowering::combineMUL(
9344 SDNode *N, DAGCombinerInfo &DCI) const {
9345 SelectionDAG &DAG = DCI.DAG;
9346
9347 // Detect even/odd widening multiplication.
9348 SDValue Op0 = N->getOperand(0);
9349 SDValue Op1 = N->getOperand(1);
9350 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9351 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9352 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9353 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9354
9355 return SDValue();
9356}
9357
9358SDValue SystemZTargetLowering::combineINTRINSIC(
9359 SDNode *N, DAGCombinerInfo &DCI) const {
9360 SelectionDAG &DAG = DCI.DAG;
9361
9362 unsigned Id = N->getConstantOperandVal(1);
9363 switch (Id) {
9364 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9365 // or larger is simply a vector load.
9366 case Intrinsic::s390_vll:
9367 case Intrinsic::s390_vlrl:
9368 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9369 if (C->getZExtValue() >= 15)
9370 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9371 N->getOperand(3), MachinePointerInfo());
9372 break;
9373 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9374 case Intrinsic::s390_vstl:
9375 case Intrinsic::s390_vstrl:
9376 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9377 if (C->getZExtValue() >= 15)
9378 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9379 N->getOperand(4), MachinePointerInfo());
9380 break;
9381 }
9382
9383 return SDValue();
9384}
9385
9386SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9387 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9388 return N->getOperand(0);
9389 return N;
9390}
9391
9393 DAGCombinerInfo &DCI) const {
9394 switch(N->getOpcode()) {
9395 default: break;
9396 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9397 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9398 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9400 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9401 case ISD::LOAD: return combineLOAD(N, DCI);
9402 case ISD::STORE: return combineSTORE(N, DCI);
9403 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9404 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9405 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9407 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9409 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9410 case ISD::SINT_TO_FP:
9411 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9412 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9413 case ISD::BSWAP: return combineBSWAP(N, DCI);
9414 case ISD::SETCC: return combineSETCC(N, DCI);
9415 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9416 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9417 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9418 case ISD::SRL:
9419 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9420 case ISD::MUL: return combineMUL(N, DCI);
9421 case ISD::SDIV:
9422 case ISD::UDIV:
9423 case ISD::SREM:
9424 case ISD::UREM: return combineIntDIVREM(N, DCI);
9426 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9427 }
9428
9429 return SDValue();
9430}
9431
9432// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9433// are for Op.
9434static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9435 unsigned OpNo) {
9436 EVT VT = Op.getValueType();
9437 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9438 APInt SrcDemE;
9439 unsigned Opcode = Op.getOpcode();
9440 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9441 unsigned Id = Op.getConstantOperandVal(0);
9442 switch (Id) {
9443 case Intrinsic::s390_vpksh: // PACKS
9444 case Intrinsic::s390_vpksf:
9445 case Intrinsic::s390_vpksg:
9446 case Intrinsic::s390_vpkshs: // PACKS_CC
9447 case Intrinsic::s390_vpksfs:
9448 case Intrinsic::s390_vpksgs:
9449 case Intrinsic::s390_vpklsh: // PACKLS
9450 case Intrinsic::s390_vpklsf:
9451 case Intrinsic::s390_vpklsg:
9452 case Intrinsic::s390_vpklshs: // PACKLS_CC
9453 case Intrinsic::s390_vpklsfs:
9454 case Intrinsic::s390_vpklsgs:
9455 // VECTOR PACK truncates the elements of two source vectors into one.
9456 SrcDemE = DemandedElts;
9457 if (OpNo == 2)
9458 SrcDemE.lshrInPlace(NumElts / 2);
9459 SrcDemE = SrcDemE.trunc(NumElts / 2);
9460 break;
9461 // VECTOR UNPACK extends half the elements of the source vector.
9462 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9463 case Intrinsic::s390_vuphh:
9464 case Intrinsic::s390_vuphf:
9465 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9466 case Intrinsic::s390_vuplhh:
9467 case Intrinsic::s390_vuplhf:
9468 SrcDemE = APInt(NumElts * 2, 0);
9469 SrcDemE.insertBits(DemandedElts, 0);
9470 break;
9471 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9472 case Intrinsic::s390_vuplhw:
9473 case Intrinsic::s390_vuplf:
9474 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9475 case Intrinsic::s390_vupllh:
9476 case Intrinsic::s390_vupllf:
9477 SrcDemE = APInt(NumElts * 2, 0);
9478 SrcDemE.insertBits(DemandedElts, NumElts);
9479 break;
9480 case Intrinsic::s390_vpdi: {
9481 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9482 SrcDemE = APInt(NumElts, 0);
9483 if (!DemandedElts[OpNo - 1])
9484 break;
9485 unsigned Mask = Op.getConstantOperandVal(3);
9486 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9487 // Demand input element 0 or 1, given by the mask bit value.
9488 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9489 break;
9490 }
9491 case Intrinsic::s390_vsldb: {
9492 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9493 assert(VT == MVT::v16i8 && "Unexpected type.");
9494 unsigned FirstIdx = Op.getConstantOperandVal(3);
9495 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9496 unsigned NumSrc0Els = 16 - FirstIdx;
9497 SrcDemE = APInt(NumElts, 0);
9498 if (OpNo == 1) {
9499 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9500 SrcDemE.insertBits(DemEls, FirstIdx);
9501 } else {
9502 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9503 SrcDemE.insertBits(DemEls, 0);
9504 }
9505 break;
9506 }
9507 case Intrinsic::s390_vperm:
9508 SrcDemE = APInt::getAllOnes(NumElts);
9509 break;
9510 default:
9511 llvm_unreachable("Unhandled intrinsic.");
9512 break;
9513 }
9514 } else {
9515 switch (Opcode) {
9517 // Scalar operand.
9518 SrcDemE = APInt(1, 1);
9519 break;
9521 SrcDemE = DemandedElts;
9522 break;
9523 default:
9524 llvm_unreachable("Unhandled opcode.");
9525 break;
9526 }
9527 }
9528 return SrcDemE;
9529}
9530
9531static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9532 const APInt &DemandedElts,
9533 const SelectionDAG &DAG, unsigned Depth,
9534 unsigned OpNo) {
9535 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9536 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9537 KnownBits LHSKnown =
9538 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9539 KnownBits RHSKnown =
9540 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9541 Known = LHSKnown.intersectWith(RHSKnown);
9542}
9543
9544void
9546 KnownBits &Known,
9547 const APInt &DemandedElts,
9548 const SelectionDAG &DAG,
9549 unsigned Depth) const {
9550 Known.resetAll();
9551
9552 // Intrinsic CC result is returned in the two low bits.
9553 unsigned Tmp0, Tmp1; // not used
9554 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9555 Known.Zero.setBitsFrom(2);
9556 return;
9557 }
9558 EVT VT = Op.getValueType();
9559 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9560 return;
9561 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9562 "KnownBits does not match VT in bitwidth");
9563 assert ((!VT.isVector() ||
9564 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9565 "DemandedElts does not match VT number of elements");
9566 unsigned BitWidth = Known.getBitWidth();
9567 unsigned Opcode = Op.getOpcode();
9568 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9569 bool IsLogical = false;
9570 unsigned Id = Op.getConstantOperandVal(0);
9571 switch (Id) {
9572 case Intrinsic::s390_vpksh: // PACKS
9573 case Intrinsic::s390_vpksf:
9574 case Intrinsic::s390_vpksg:
9575 case Intrinsic::s390_vpkshs: // PACKS_CC
9576 case Intrinsic::s390_vpksfs:
9577 case Intrinsic::s390_vpksgs:
9578 case Intrinsic::s390_vpklsh: // PACKLS
9579 case Intrinsic::s390_vpklsf:
9580 case Intrinsic::s390_vpklsg:
9581 case Intrinsic::s390_vpklshs: // PACKLS_CC
9582 case Intrinsic::s390_vpklsfs:
9583 case Intrinsic::s390_vpklsgs:
9584 case Intrinsic::s390_vpdi:
9585 case Intrinsic::s390_vsldb:
9586 case Intrinsic::s390_vperm:
9587 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9588 break;
9589 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9590 case Intrinsic::s390_vuplhh:
9591 case Intrinsic::s390_vuplhf:
9592 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9593 case Intrinsic::s390_vupllh:
9594 case Intrinsic::s390_vupllf:
9595 IsLogical = true;
9596 [[fallthrough]];
9597 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9598 case Intrinsic::s390_vuphh:
9599 case Intrinsic::s390_vuphf:
9600 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9601 case Intrinsic::s390_vuplhw:
9602 case Intrinsic::s390_vuplf: {
9603 SDValue SrcOp = Op.getOperand(1);
9604 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9605 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9606 if (IsLogical) {
9607 Known = Known.zext(BitWidth);
9608 } else
9609 Known = Known.sext(BitWidth);
9610 break;
9611 }
9612 default:
9613 break;
9614 }
9615 } else {
9616 switch (Opcode) {
9619 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9620 break;
9621 case SystemZISD::REPLICATE: {
9622 SDValue SrcOp = Op.getOperand(0);
9623 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9625 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9626 break;
9627 }
9628 default:
9629 break;
9630 }
9631 }
9632
9633 // Known has the width of the source operand(s). Adjust if needed to match
9634 // the passed bitwidth.
9635 if (Known.getBitWidth() != BitWidth)
9636 Known = Known.anyextOrTrunc(BitWidth);
9637}
9638
9639static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9640 const SelectionDAG &DAG, unsigned Depth,
9641 unsigned OpNo) {
9642 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9643 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9644 if (LHS == 1) return 1; // Early out.
9645 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9646 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9647 if (RHS == 1) return 1; // Early out.
9648 unsigned Common = std::min(LHS, RHS);
9649 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9650 EVT VT = Op.getValueType();
9651 unsigned VTBits = VT.getScalarSizeInBits();
9652 if (SrcBitWidth > VTBits) { // PACK
9653 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9654 if (Common > SrcExtraBits)
9655 return (Common - SrcExtraBits);
9656 return 1;
9657 }
9658 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9659 return Common;
9660}
9661
9662unsigned
9664 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9665 unsigned Depth) const {
9666 if (Op.getResNo() != 0)
9667 return 1;
9668 unsigned Opcode = Op.getOpcode();
9669 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9670 unsigned Id = Op.getConstantOperandVal(0);
9671 switch (Id) {
9672 case Intrinsic::s390_vpksh: // PACKS
9673 case Intrinsic::s390_vpksf:
9674 case Intrinsic::s390_vpksg:
9675 case Intrinsic::s390_vpkshs: // PACKS_CC
9676 case Intrinsic::s390_vpksfs:
9677 case Intrinsic::s390_vpksgs:
9678 case Intrinsic::s390_vpklsh: // PACKLS
9679 case Intrinsic::s390_vpklsf:
9680 case Intrinsic::s390_vpklsg:
9681 case Intrinsic::s390_vpklshs: // PACKLS_CC
9682 case Intrinsic::s390_vpklsfs:
9683 case Intrinsic::s390_vpklsgs:
9684 case Intrinsic::s390_vpdi:
9685 case Intrinsic::s390_vsldb:
9686 case Intrinsic::s390_vperm:
9687 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9688 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9689 case Intrinsic::s390_vuphh:
9690 case Intrinsic::s390_vuphf:
9691 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9692 case Intrinsic::s390_vuplhw:
9693 case Intrinsic::s390_vuplf: {
9694 SDValue PackedOp = Op.getOperand(1);
9695 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9696 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9697 EVT VT = Op.getValueType();
9698 unsigned VTBits = VT.getScalarSizeInBits();
9699 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9700 return Tmp;
9701 }
9702 default:
9703 break;
9704 }
9705 } else {
9706 switch (Opcode) {
9708 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9709 default:
9710 break;
9711 }
9712 }
9713
9714 return 1;
9715}
9716
9719 const APInt &DemandedElts, const SelectionDAG &DAG,
9720 bool PoisonOnly, unsigned Depth) const {
9721 switch (Op->getOpcode()) {
9724 return true;
9725 }
9726 return false;
9727}
9728
9729unsigned
9731 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9732 unsigned StackAlign = TFI->getStackAlignment();
9733 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9734 "Unexpected stack alignment");
9735 // The default stack probe size is 4096 if the function has no
9736 // stack-probe-size attribute.
9737 unsigned StackProbeSize =
9738 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9739 // Round down to the stack alignment.
9740 StackProbeSize &= ~(StackAlign - 1);
9741 return StackProbeSize ? StackProbeSize : StackAlign;
9742}
9743
9744//===----------------------------------------------------------------------===//
9745// Custom insertion
9746//===----------------------------------------------------------------------===//
9747
9748// Force base value Base into a register before MI. Return the register.
9750 const SystemZInstrInfo *TII) {
9751 MachineBasicBlock *MBB = MI.getParent();
9752 MachineFunction &MF = *MBB->getParent();
9754
9755 if (Base.isReg()) {
9756 // Copy Base into a new virtual register to help register coalescing in
9757 // cases with multiple uses.
9758 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9759 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9760 .add(Base);
9761 return Reg;
9762 }
9763
9764 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9765 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9766 .add(Base)
9767 .addImm(0)
9768 .addReg(0);
9769 return Reg;
9770}
9771
9772// The CC operand of MI might be missing a kill marker because there
9773// were multiple uses of CC, and ISel didn't know which to mark.
9774// Figure out whether MI should have had a kill marker.
9776 // Scan forward through BB for a use/def of CC.
9778 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9779 const MachineInstr &MI = *miI;
9780 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9781 return false;
9782 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9783 break; // Should have kill-flag - update below.
9784 }
9785
9786 // If we hit the end of the block, check whether CC is live into a
9787 // successor.
9788 if (miI == MBB->end()) {
9789 for (const MachineBasicBlock *Succ : MBB->successors())
9790 if (Succ->isLiveIn(SystemZ::CC))
9791 return false;
9792 }
9793
9794 return true;
9795}
9796
9797// Return true if it is OK for this Select pseudo-opcode to be cascaded
9798// together with other Select pseudo-opcodes into a single basic-block with
9799// a conditional jump around it.
9801 switch (MI.getOpcode()) {
9802 case SystemZ::Select32:
9803 case SystemZ::Select64:
9804 case SystemZ::Select128:
9805 case SystemZ::SelectF32:
9806 case SystemZ::SelectF64:
9807 case SystemZ::SelectF128:
9808 case SystemZ::SelectVR32:
9809 case SystemZ::SelectVR64:
9810 case SystemZ::SelectVR128:
9811 return true;
9812
9813 default:
9814 return false;
9815 }
9816}
9817
9818// Helper function, which inserts PHI functions into SinkMBB:
9819// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9820// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9822 MachineBasicBlock *TrueMBB,
9823 MachineBasicBlock *FalseMBB,
9824 MachineBasicBlock *SinkMBB) {
9825 MachineFunction *MF = TrueMBB->getParent();
9827
9828 MachineInstr *FirstMI = Selects.front();
9829 unsigned CCValid = FirstMI->getOperand(3).getImm();
9830 unsigned CCMask = FirstMI->getOperand(4).getImm();
9831
9832 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9833
9834 // As we are creating the PHIs, we have to be careful if there is more than
9835 // one. Later Selects may reference the results of earlier Selects, but later
9836 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9837 // That also means that PHI construction must work forward from earlier to
9838 // later, and that the code must maintain a mapping from earlier PHI's
9839 // destination registers, and the registers that went into the PHI.
9841
9842 for (auto *MI : Selects) {
9843 Register DestReg = MI->getOperand(0).getReg();
9844 Register TrueReg = MI->getOperand(1).getReg();
9845 Register FalseReg = MI->getOperand(2).getReg();
9846
9847 // If this Select we are generating is the opposite condition from
9848 // the jump we generated, then we have to swap the operands for the
9849 // PHI that is going to be generated.
9850 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9851 std::swap(TrueReg, FalseReg);
9852
9853 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9854 TrueReg = It->second.first;
9855
9856 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9857 FalseReg = It->second.second;
9858
9859 DebugLoc DL = MI->getDebugLoc();
9860 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9861 .addReg(TrueReg).addMBB(TrueMBB)
9862 .addReg(FalseReg).addMBB(FalseMBB);
9863
9864 // Add this PHI to the rewrite table.
9865 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9866 }
9867
9868 MF->getProperties().resetNoPHIs();
9869}
9870
9872SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9873 MachineBasicBlock *BB) const {
9874 MachineFunction &MF = *BB->getParent();
9875 MachineFrameInfo &MFI = MF.getFrameInfo();
9876 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9877 assert(TFL->hasReservedCallFrame(MF) &&
9878 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9879 (void)TFL;
9880 // Get the MaxCallFrameSize value and erase MI since it serves no further
9881 // purpose as the call frame is statically reserved in the prolog. Set
9882 // AdjustsStack as MI is *not* mapped as a frame instruction.
9883 uint32_t NumBytes = MI.getOperand(0).getImm();
9884 if (NumBytes > MFI.getMaxCallFrameSize())
9885 MFI.setMaxCallFrameSize(NumBytes);
9886 MFI.setAdjustsStack(true);
9887
9888 MI.eraseFromParent();
9889 return BB;
9890}
9891
9892// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9894SystemZTargetLowering::emitSelect(MachineInstr &MI,
9895 MachineBasicBlock *MBB) const {
9896 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9897 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9898
9899 unsigned CCValid = MI.getOperand(3).getImm();
9900 unsigned CCMask = MI.getOperand(4).getImm();
9901
9902 // If we have a sequence of Select* pseudo instructions using the
9903 // same condition code value, we want to expand all of them into
9904 // a single pair of basic blocks using the same condition.
9905 SmallVector<MachineInstr*, 8> Selects;
9906 SmallVector<MachineInstr*, 8> DbgValues;
9907 Selects.push_back(&MI);
9908 unsigned Count = 0;
9909 for (MachineInstr &NextMI : llvm::make_range(
9910 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9911 if (isSelectPseudo(NextMI)) {
9912 assert(NextMI.getOperand(3).getImm() == CCValid &&
9913 "Bad CCValid operands since CC was not redefined.");
9914 if (NextMI.getOperand(4).getImm() == CCMask ||
9915 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9916 Selects.push_back(&NextMI);
9917 continue;
9918 }
9919 break;
9920 }
9921 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9922 NextMI.usesCustomInsertionHook())
9923 break;
9924 bool User = false;
9925 for (auto *SelMI : Selects)
9926 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9927 User = true;
9928 break;
9929 }
9930 if (NextMI.isDebugInstr()) {
9931 if (User) {
9932 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9933 DbgValues.push_back(&NextMI);
9934 }
9935 } else if (User || ++Count > 20)
9936 break;
9937 }
9938
9939 MachineInstr *LastMI = Selects.back();
9940 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9941 checkCCKill(*LastMI, MBB));
9942 MachineBasicBlock *StartMBB = MBB;
9943 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9944 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9945
9946 // Unless CC was killed in the last Select instruction, mark it as
9947 // live-in to both FalseMBB and JoinMBB.
9948 if (!CCKilled) {
9949 FalseMBB->addLiveIn(SystemZ::CC);
9950 JoinMBB->addLiveIn(SystemZ::CC);
9951 }
9952
9953 // StartMBB:
9954 // BRC CCMask, JoinMBB
9955 // # fallthrough to FalseMBB
9956 MBB = StartMBB;
9957 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9958 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9959 MBB->addSuccessor(JoinMBB);
9960 MBB->addSuccessor(FalseMBB);
9961
9962 // FalseMBB:
9963 // # fallthrough to JoinMBB
9964 MBB = FalseMBB;
9965 MBB->addSuccessor(JoinMBB);
9966
9967 // JoinMBB:
9968 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9969 // ...
9970 MBB = JoinMBB;
9971 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9972 for (auto *SelMI : Selects)
9973 SelMI->eraseFromParent();
9974
9976 for (auto *DbgMI : DbgValues)
9977 MBB->splice(InsertPos, StartMBB, DbgMI);
9978
9979 return JoinMBB;
9980}
9981
9982// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9983// StoreOpcode is the store to use and Invert says whether the store should
9984// happen when the condition is false rather than true. If a STORE ON
9985// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9986MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9988 unsigned StoreOpcode,
9989 unsigned STOCOpcode,
9990 bool Invert) const {
9991 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9992
9993 Register SrcReg = MI.getOperand(0).getReg();
9994 MachineOperand Base = MI.getOperand(1);
9995 int64_t Disp = MI.getOperand(2).getImm();
9996 Register IndexReg = MI.getOperand(3).getReg();
9997 unsigned CCValid = MI.getOperand(4).getImm();
9998 unsigned CCMask = MI.getOperand(5).getImm();
9999 DebugLoc DL = MI.getDebugLoc();
10000
10001 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
10002
10003 // ISel pattern matching also adds a load memory operand of the same
10004 // address, so take special care to find the storing memory operand.
10005 MachineMemOperand *MMO = nullptr;
10006 for (auto *I : MI.memoperands())
10007 if (I->isStore()) {
10008 MMO = I;
10009 break;
10010 }
10011
10012 // Use STOCOpcode if possible. We could use different store patterns in
10013 // order to avoid matching the index register, but the performance trade-offs
10014 // might be more complicated in that case.
10015 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
10016 if (Invert)
10017 CCMask ^= CCValid;
10018
10019 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
10020 .addReg(SrcReg)
10021 .add(Base)
10022 .addImm(Disp)
10023 .addImm(CCValid)
10024 .addImm(CCMask)
10025 .addMemOperand(MMO);
10026
10027 MI.eraseFromParent();
10028 return MBB;
10029 }
10030
10031 // Get the condition needed to branch around the store.
10032 if (!Invert)
10033 CCMask ^= CCValid;
10034
10035 MachineBasicBlock *StartMBB = MBB;
10036 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
10037 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
10038
10039 // Unless CC was killed in the CondStore instruction, mark it as
10040 // live-in to both FalseMBB and JoinMBB.
10041 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
10042 !checkCCKill(MI, JoinMBB)) {
10043 FalseMBB->addLiveIn(SystemZ::CC);
10044 JoinMBB->addLiveIn(SystemZ::CC);
10045 }
10046
10047 // StartMBB:
10048 // BRC CCMask, JoinMBB
10049 // # fallthrough to FalseMBB
10050 MBB = StartMBB;
10051 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10052 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
10053 MBB->addSuccessor(JoinMBB);
10054 MBB->addSuccessor(FalseMBB);
10055
10056 // FalseMBB:
10057 // store %SrcReg, %Disp(%Index,%Base)
10058 // # fallthrough to JoinMBB
10059 MBB = FalseMBB;
10060 BuildMI(MBB, DL, TII->get(StoreOpcode))
10061 .addReg(SrcReg)
10062 .add(Base)
10063 .addImm(Disp)
10064 .addReg(IndexReg)
10065 .addMemOperand(MMO);
10066 MBB->addSuccessor(JoinMBB);
10067
10068 MI.eraseFromParent();
10069 return JoinMBB;
10070}
10071
10072// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
10074SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
10076 bool Unsigned) const {
10077 MachineFunction &MF = *MBB->getParent();
10078 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10079 MachineRegisterInfo &MRI = MF.getRegInfo();
10080
10081 // Synthetic instruction to compare 128-bit values.
10082 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
10083 Register Op0 = MI.getOperand(0).getReg();
10084 Register Op1 = MI.getOperand(1).getReg();
10085
10086 MachineBasicBlock *StartMBB = MBB;
10087 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
10088 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
10089
10090 // StartMBB:
10091 //
10092 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
10093 // Swap the inputs to get:
10094 // CC 1 if high(Op0) > high(Op1)
10095 // CC 2 if high(Op0) < high(Op1)
10096 // CC 0 if high(Op0) == high(Op1)
10097 //
10098 // If CC != 0, we'd done, so jump over the next instruction.
10099 //
10100 // VEC[L]G Op1, Op0
10101 // JNE JoinMBB
10102 // # fallthrough to HiEqMBB
10103 MBB = StartMBB;
10104 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
10105 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
10106 .addReg(Op1).addReg(Op0);
10107 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
10109 MBB->addSuccessor(JoinMBB);
10110 MBB->addSuccessor(HiEqMBB);
10111
10112 // HiEqMBB:
10113 //
10114 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10115 // Since we already know the high parts are equal, the CC
10116 // result will only depend on the low parts:
10117 // CC 1 if low(Op0) > low(Op1)
10118 // CC 3 if low(Op0) <= low(Op1)
10119 //
10120 // VCHLGS Tmp, Op0, Op1
10121 // # fallthrough to JoinMBB
10122 MBB = HiEqMBB;
10123 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
10124 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
10125 .addReg(Op0).addReg(Op1);
10126 MBB->addSuccessor(JoinMBB);
10127
10128 // Mark CC as live-in to JoinMBB.
10129 JoinMBB->addLiveIn(SystemZ::CC);
10130
10131 MI.eraseFromParent();
10132 return JoinMBB;
10133}
10134
10135// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10136// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10137// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10138// whether the field should be inverted after performing BinOpcode (e.g. for
10139// NAND).
10140MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10141 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10142 bool Invert) const {
10143 MachineFunction &MF = *MBB->getParent();
10144 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10145 MachineRegisterInfo &MRI = MF.getRegInfo();
10146
10147 // Extract the operands. Base can be a register or a frame index.
10148 // Src2 can be a register or immediate.
10149 Register Dest = MI.getOperand(0).getReg();
10150 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10151 int64_t Disp = MI.getOperand(2).getImm();
10152 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10153 Register BitShift = MI.getOperand(4).getReg();
10154 Register NegBitShift = MI.getOperand(5).getReg();
10155 unsigned BitSize = MI.getOperand(6).getImm();
10156 DebugLoc DL = MI.getDebugLoc();
10157
10158 // Get the right opcodes for the displacement.
10159 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10160 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10161 assert(LOpcode && CSOpcode && "Displacement out of range");
10162
10163 // Create virtual registers for temporary results.
10164 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10165 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10166 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10167 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10168 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10169
10170 // Insert a basic block for the main loop.
10171 MachineBasicBlock *StartMBB = MBB;
10172 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10173 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10174
10175 // StartMBB:
10176 // ...
10177 // %OrigVal = L Disp(%Base)
10178 // # fall through to LoopMBB
10179 MBB = StartMBB;
10180 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10181 MBB->addSuccessor(LoopMBB);
10182
10183 // LoopMBB:
10184 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10185 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10186 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10187 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10188 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10189 // JNE LoopMBB
10190 // # fall through to DoneMBB
10191 MBB = LoopMBB;
10192 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10193 .addReg(OrigVal).addMBB(StartMBB)
10194 .addReg(Dest).addMBB(LoopMBB);
10195 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10196 .addReg(OldVal).addReg(BitShift).addImm(0);
10197 if (Invert) {
10198 // Perform the operation normally and then invert every bit of the field.
10199 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10200 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10201 // XILF with the upper BitSize bits set.
10202 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10203 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10204 } else if (BinOpcode)
10205 // A simply binary operation.
10206 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10207 .addReg(RotatedOldVal)
10208 .add(Src2);
10209 else
10210 // Use RISBG to rotate Src2 into position and use it to replace the
10211 // field in RotatedOldVal.
10212 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10213 .addReg(RotatedOldVal).addReg(Src2.getReg())
10214 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10215 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10216 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10217 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10218 .addReg(OldVal)
10219 .addReg(NewVal)
10220 .add(Base)
10221 .addImm(Disp);
10222 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10224 MBB->addSuccessor(LoopMBB);
10225 MBB->addSuccessor(DoneMBB);
10226
10227 MI.eraseFromParent();
10228 return DoneMBB;
10229}
10230
10231// Implement EmitInstrWithCustomInserter for subword pseudo
10232// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10233// instruction that should be used to compare the current field with the
10234// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10235// for when the current field should be kept.
10236MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10237 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10238 unsigned KeepOldMask) const {
10239 MachineFunction &MF = *MBB->getParent();
10240 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10241 MachineRegisterInfo &MRI = MF.getRegInfo();
10242
10243 // Extract the operands. Base can be a register or a frame index.
10244 Register Dest = MI.getOperand(0).getReg();
10245 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10246 int64_t Disp = MI.getOperand(2).getImm();
10247 Register Src2 = MI.getOperand(3).getReg();
10248 Register BitShift = MI.getOperand(4).getReg();
10249 Register NegBitShift = MI.getOperand(5).getReg();
10250 unsigned BitSize = MI.getOperand(6).getImm();
10251 DebugLoc DL = MI.getDebugLoc();
10252
10253 // Get the right opcodes for the displacement.
10254 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10255 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10256 assert(LOpcode && CSOpcode && "Displacement out of range");
10257
10258 // Create virtual registers for temporary results.
10259 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10260 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10261 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10262 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10263 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10264 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10265
10266 // Insert 3 basic blocks for the loop.
10267 MachineBasicBlock *StartMBB = MBB;
10268 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10269 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10270 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10271 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10272
10273 // StartMBB:
10274 // ...
10275 // %OrigVal = L Disp(%Base)
10276 // # fall through to LoopMBB
10277 MBB = StartMBB;
10278 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10279 MBB->addSuccessor(LoopMBB);
10280
10281 // LoopMBB:
10282 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10283 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10284 // CompareOpcode %RotatedOldVal, %Src2
10285 // BRC KeepOldMask, UpdateMBB
10286 MBB = LoopMBB;
10287 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10288 .addReg(OrigVal).addMBB(StartMBB)
10289 .addReg(Dest).addMBB(UpdateMBB);
10290 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10291 .addReg(OldVal).addReg(BitShift).addImm(0);
10292 BuildMI(MBB, DL, TII->get(CompareOpcode))
10293 .addReg(RotatedOldVal).addReg(Src2);
10294 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10295 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10296 MBB->addSuccessor(UpdateMBB);
10297 MBB->addSuccessor(UseAltMBB);
10298
10299 // UseAltMBB:
10300 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10301 // # fall through to UpdateMBB
10302 MBB = UseAltMBB;
10303 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10304 .addReg(RotatedOldVal).addReg(Src2)
10305 .addImm(32).addImm(31 + BitSize).addImm(0);
10306 MBB->addSuccessor(UpdateMBB);
10307
10308 // UpdateMBB:
10309 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10310 // [ %RotatedAltVal, UseAltMBB ]
10311 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10312 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10313 // JNE LoopMBB
10314 // # fall through to DoneMBB
10315 MBB = UpdateMBB;
10316 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10317 .addReg(RotatedOldVal).addMBB(LoopMBB)
10318 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10319 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10320 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10321 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10322 .addReg(OldVal)
10323 .addReg(NewVal)
10324 .add(Base)
10325 .addImm(Disp);
10326 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10328 MBB->addSuccessor(LoopMBB);
10329 MBB->addSuccessor(DoneMBB);
10330
10331 MI.eraseFromParent();
10332 return DoneMBB;
10333}
10334
10335// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10336// instruction MI.
10338SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10339 MachineBasicBlock *MBB) const {
10340 MachineFunction &MF = *MBB->getParent();
10341 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10342 MachineRegisterInfo &MRI = MF.getRegInfo();
10343
10344 // Extract the operands. Base can be a register or a frame index.
10345 Register Dest = MI.getOperand(0).getReg();
10346 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10347 int64_t Disp = MI.getOperand(2).getImm();
10348 Register CmpVal = MI.getOperand(3).getReg();
10349 Register OrigSwapVal = MI.getOperand(4).getReg();
10350 Register BitShift = MI.getOperand(5).getReg();
10351 Register NegBitShift = MI.getOperand(6).getReg();
10352 int64_t BitSize = MI.getOperand(7).getImm();
10353 DebugLoc DL = MI.getDebugLoc();
10354
10355 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10356
10357 // Get the right opcodes for the displacement and zero-extension.
10358 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10359 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10360 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10361 assert(LOpcode && CSOpcode && "Displacement out of range");
10362
10363 // Create virtual registers for temporary results.
10364 Register OrigOldVal = MRI.createVirtualRegister(RC);
10365 Register OldVal = MRI.createVirtualRegister(RC);
10366 Register SwapVal = MRI.createVirtualRegister(RC);
10367 Register StoreVal = MRI.createVirtualRegister(RC);
10368 Register OldValRot = MRI.createVirtualRegister(RC);
10369 Register RetryOldVal = MRI.createVirtualRegister(RC);
10370 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10371
10372 // Insert 2 basic blocks for the loop.
10373 MachineBasicBlock *StartMBB = MBB;
10374 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10375 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10376 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10377
10378 // StartMBB:
10379 // ...
10380 // %OrigOldVal = L Disp(%Base)
10381 // # fall through to LoopMBB
10382 MBB = StartMBB;
10383 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10384 .add(Base)
10385 .addImm(Disp)
10386 .addReg(0);
10387 MBB->addSuccessor(LoopMBB);
10388
10389 // LoopMBB:
10390 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10391 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10392 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10393 // ^^ The low BitSize bits contain the field
10394 // of interest.
10395 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10396 // ^^ Replace the upper 32-BitSize bits of the
10397 // swap value with those that we loaded and rotated.
10398 // %Dest = LL[CH] %OldValRot
10399 // CR %Dest, %CmpVal
10400 // JNE DoneMBB
10401 // # Fall through to SetMBB
10402 MBB = LoopMBB;
10403 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10404 .addReg(OrigOldVal).addMBB(StartMBB)
10405 .addReg(RetryOldVal).addMBB(SetMBB);
10406 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10407 .addReg(OrigSwapVal).addMBB(StartMBB)
10408 .addReg(RetrySwapVal).addMBB(SetMBB);
10409 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10410 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10411 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10412 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10413 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10414 .addReg(OldValRot);
10415 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10416 .addReg(Dest).addReg(CmpVal);
10417 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10420 MBB->addSuccessor(DoneMBB);
10421 MBB->addSuccessor(SetMBB);
10422
10423 // SetMBB:
10424 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10425 // ^^ Rotate the new field to its proper position.
10426 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10427 // JNE LoopMBB
10428 // # fall through to ExitMBB
10429 MBB = SetMBB;
10430 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10431 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10432 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10433 .addReg(OldVal)
10434 .addReg(StoreVal)
10435 .add(Base)
10436 .addImm(Disp);
10437 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10439 MBB->addSuccessor(LoopMBB);
10440 MBB->addSuccessor(DoneMBB);
10441
10442 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10443 // to the block after the loop. At this point, CC may have been defined
10444 // either by the CR in LoopMBB or by the CS in SetMBB.
10445 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10446 DoneMBB->addLiveIn(SystemZ::CC);
10447
10448 MI.eraseFromParent();
10449 return DoneMBB;
10450}
10451
10452// Emit a move from two GR64s to a GR128.
10454SystemZTargetLowering::emitPair128(MachineInstr &MI,
10455 MachineBasicBlock *MBB) const {
10456 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10457 const DebugLoc &DL = MI.getDebugLoc();
10458
10459 Register Dest = MI.getOperand(0).getReg();
10460 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10461 .add(MI.getOperand(1))
10462 .addImm(SystemZ::subreg_h64)
10463 .add(MI.getOperand(2))
10464 .addImm(SystemZ::subreg_l64);
10465 MI.eraseFromParent();
10466 return MBB;
10467}
10468
10469// Emit an extension from a GR64 to a GR128. ClearEven is true
10470// if the high register of the GR128 value must be cleared or false if
10471// it's "don't care".
10472MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10474 bool ClearEven) const {
10475 MachineFunction &MF = *MBB->getParent();
10476 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10477 MachineRegisterInfo &MRI = MF.getRegInfo();
10478 DebugLoc DL = MI.getDebugLoc();
10479
10480 Register Dest = MI.getOperand(0).getReg();
10481 Register Src = MI.getOperand(1).getReg();
10482 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10483
10484 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10485 if (ClearEven) {
10486 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10487 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10488
10489 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10490 .addImm(0);
10491 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10492 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10493 In128 = NewIn128;
10494 }
10495 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10496 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10497
10498 MI.eraseFromParent();
10499 return MBB;
10500}
10501
10503SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10505 unsigned Opcode, bool IsMemset) const {
10506 MachineFunction &MF = *MBB->getParent();
10507 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10508 MachineRegisterInfo &MRI = MF.getRegInfo();
10509 DebugLoc DL = MI.getDebugLoc();
10510
10511 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10512 uint64_t DestDisp = MI.getOperand(1).getImm();
10513 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10514 uint64_t SrcDisp;
10515
10516 // Fold the displacement Disp if it is out of range.
10517 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10518 if (!isUInt<12>(Disp)) {
10519 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10520 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10521 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10522 .add(Base).addImm(Disp).addReg(0);
10524 Disp = 0;
10525 }
10526 };
10527
10528 if (!IsMemset) {
10529 SrcBase = earlyUseOperand(MI.getOperand(2));
10530 SrcDisp = MI.getOperand(3).getImm();
10531 } else {
10532 SrcBase = DestBase;
10533 SrcDisp = DestDisp++;
10534 foldDisplIfNeeded(DestBase, DestDisp);
10535 }
10536
10537 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10538 bool IsImmForm = LengthMO.isImm();
10539 bool IsRegForm = !IsImmForm;
10540
10541 // Build and insert one Opcode of Length, with special treatment for memset.
10542 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10544 MachineOperand DBase, uint64_t DDisp,
10545 MachineOperand SBase, uint64_t SDisp,
10546 unsigned Length) -> void {
10547 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10548 if (IsMemset) {
10549 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10550 if (ByteMO.isImm())
10551 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10552 .add(SBase).addImm(SDisp).add(ByteMO);
10553 else
10554 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10555 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10556 if (--Length == 0)
10557 return;
10558 }
10559 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10560 .add(DBase).addImm(DDisp).addImm(Length)
10561 .add(SBase).addImm(SDisp)
10562 .setMemRefs(MI.memoperands());
10563 };
10564
10565 bool NeedsLoop = false;
10566 uint64_t ImmLength = 0;
10567 Register LenAdjReg = SystemZ::NoRegister;
10568 if (IsImmForm) {
10569 ImmLength = LengthMO.getImm();
10570 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10571 if (ImmLength == 0) {
10572 MI.eraseFromParent();
10573 return MBB;
10574 }
10575 if (Opcode == SystemZ::CLC) {
10576 if (ImmLength > 3 * 256)
10577 // A two-CLC sequence is a clear win over a loop, not least because
10578 // it needs only one branch. A three-CLC sequence needs the same
10579 // number of branches as a loop (i.e. 2), but is shorter. That
10580 // brings us to lengths greater than 768 bytes. It seems relatively
10581 // likely that a difference will be found within the first 768 bytes,
10582 // so we just optimize for the smallest number of branch
10583 // instructions, in order to avoid polluting the prediction buffer
10584 // too much.
10585 NeedsLoop = true;
10586 } else if (ImmLength > 6 * 256)
10587 // The heuristic we use is to prefer loops for anything that would
10588 // require 7 or more MVCs. With these kinds of sizes there isn't much
10589 // to choose between straight-line code and looping code, since the
10590 // time will be dominated by the MVCs themselves.
10591 NeedsLoop = true;
10592 } else {
10593 NeedsLoop = true;
10594 LenAdjReg = LengthMO.getReg();
10595 }
10596
10597 // When generating more than one CLC, all but the last will need to
10598 // branch to the end when a difference is found.
10599 MachineBasicBlock *EndMBB =
10600 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10602 : nullptr);
10603
10604 if (NeedsLoop) {
10605 Register StartCountReg =
10606 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10607 if (IsImmForm) {
10608 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10609 ImmLength &= 255;
10610 } else {
10611 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10612 .addReg(LenAdjReg)
10613 .addReg(0)
10614 .addImm(8);
10615 }
10616
10617 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10618 auto loadZeroAddress = [&]() -> MachineOperand {
10619 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10620 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10621 return MachineOperand::CreateReg(Reg, false);
10622 };
10623 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10624 DestBase = loadZeroAddress();
10625 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10626 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10627
10628 MachineBasicBlock *StartMBB = nullptr;
10629 MachineBasicBlock *LoopMBB = nullptr;
10630 MachineBasicBlock *NextMBB = nullptr;
10631 MachineBasicBlock *DoneMBB = nullptr;
10632 MachineBasicBlock *AllDoneMBB = nullptr;
10633
10634 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10635 Register StartDestReg =
10636 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10637
10638 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10639 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10640 Register ThisDestReg =
10641 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10642 Register NextSrcReg = MRI.createVirtualRegister(RC);
10643 Register NextDestReg =
10644 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10645 RC = &SystemZ::GR64BitRegClass;
10646 Register ThisCountReg = MRI.createVirtualRegister(RC);
10647 Register NextCountReg = MRI.createVirtualRegister(RC);
10648
10649 if (IsRegForm) {
10650 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10651 StartMBB = SystemZ::emitBlockAfter(MBB);
10652 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10653 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10654 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10655
10656 // MBB:
10657 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10658 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10659 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10660 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10662 .addMBB(AllDoneMBB);
10663 MBB->addSuccessor(AllDoneMBB);
10664 if (!IsMemset)
10665 MBB->addSuccessor(StartMBB);
10666 else {
10667 // MemsetOneCheckMBB:
10668 // # Jump to MemsetOneMBB for a memset of length 1, or
10669 // # fall thru to StartMBB.
10670 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10671 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10672 MBB->addSuccessor(MemsetOneCheckMBB);
10673 MBB = MemsetOneCheckMBB;
10674 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10675 .addReg(LenAdjReg).addImm(-1);
10676 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10678 .addMBB(MemsetOneMBB);
10679 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10680 MBB->addSuccessor(StartMBB, {90, 100});
10681
10682 // MemsetOneMBB:
10683 // # Jump back to AllDoneMBB after a single MVI or STC.
10684 MBB = MemsetOneMBB;
10685 insertMemMemOp(MBB, MBB->end(),
10686 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10687 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10688 1);
10689 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10690 MBB->addSuccessor(AllDoneMBB);
10691 }
10692
10693 // StartMBB:
10694 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10695 MBB = StartMBB;
10696 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10697 .addReg(StartCountReg).addImm(0);
10698 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10700 .addMBB(DoneMBB);
10701 MBB->addSuccessor(DoneMBB);
10702 MBB->addSuccessor(LoopMBB);
10703 }
10704 else {
10705 StartMBB = MBB;
10706 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10707 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10708 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10709
10710 // StartMBB:
10711 // # fall through to LoopMBB
10712 MBB->addSuccessor(LoopMBB);
10713
10714 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10715 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10716 if (EndMBB && !ImmLength)
10717 // If the loop handled the whole CLC range, DoneMBB will be empty with
10718 // CC live-through into EndMBB, so add it as live-in.
10719 DoneMBB->addLiveIn(SystemZ::CC);
10720 }
10721
10722 // LoopMBB:
10723 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10724 // [ %NextDestReg, NextMBB ]
10725 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10726 // [ %NextSrcReg, NextMBB ]
10727 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10728 // [ %NextCountReg, NextMBB ]
10729 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10730 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10731 // ( JLH EndMBB )
10732 //
10733 // The prefetch is used only for MVC. The JLH is used only for CLC.
10734 MBB = LoopMBB;
10735 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10736 .addReg(StartDestReg).addMBB(StartMBB)
10737 .addReg(NextDestReg).addMBB(NextMBB);
10738 if (!HaveSingleBase)
10739 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10740 .addReg(StartSrcReg).addMBB(StartMBB)
10741 .addReg(NextSrcReg).addMBB(NextMBB);
10742 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10743 .addReg(StartCountReg).addMBB(StartMBB)
10744 .addReg(NextCountReg).addMBB(NextMBB);
10745 if (Opcode == SystemZ::MVC)
10746 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10748 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10749 insertMemMemOp(MBB, MBB->end(),
10750 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10751 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10752 if (EndMBB) {
10753 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10755 .addMBB(EndMBB);
10756 MBB->addSuccessor(EndMBB);
10757 MBB->addSuccessor(NextMBB);
10758 }
10759
10760 // NextMBB:
10761 // %NextDestReg = LA 256(%ThisDestReg)
10762 // %NextSrcReg = LA 256(%ThisSrcReg)
10763 // %NextCountReg = AGHI %ThisCountReg, -1
10764 // CGHI %NextCountReg, 0
10765 // JLH LoopMBB
10766 // # fall through to DoneMBB
10767 //
10768 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10769 MBB = NextMBB;
10770 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10771 .addReg(ThisDestReg).addImm(256).addReg(0);
10772 if (!HaveSingleBase)
10773 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10774 .addReg(ThisSrcReg).addImm(256).addReg(0);
10775 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10776 .addReg(ThisCountReg).addImm(-1);
10777 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10778 .addReg(NextCountReg).addImm(0);
10779 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10781 .addMBB(LoopMBB);
10782 MBB->addSuccessor(LoopMBB);
10783 MBB->addSuccessor(DoneMBB);
10784
10785 MBB = DoneMBB;
10786 if (IsRegForm) {
10787 // DoneMBB:
10788 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10789 // # Use EXecute Relative Long for the remainder of the bytes. The target
10790 // instruction of the EXRL will have a length field of 1 since 0 is an
10791 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10792 // 0xff) + 1.
10793 // # Fall through to AllDoneMBB.
10794 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10795 Register RemDestReg = HaveSingleBase ? RemSrcReg
10796 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10797 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10798 .addReg(StartDestReg).addMBB(StartMBB)
10799 .addReg(NextDestReg).addMBB(NextMBB);
10800 if (!HaveSingleBase)
10801 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10802 .addReg(StartSrcReg).addMBB(StartMBB)
10803 .addReg(NextSrcReg).addMBB(NextMBB);
10804 if (IsMemset)
10805 insertMemMemOp(MBB, MBB->end(),
10806 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10807 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10808 MachineInstrBuilder EXRL_MIB =
10809 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10810 .addImm(Opcode)
10811 .addReg(LenAdjReg)
10812 .addReg(RemDestReg).addImm(DestDisp)
10813 .addReg(RemSrcReg).addImm(SrcDisp);
10814 MBB->addSuccessor(AllDoneMBB);
10815 MBB = AllDoneMBB;
10816 if (Opcode != SystemZ::MVC) {
10817 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10818 if (EndMBB)
10819 MBB->addLiveIn(SystemZ::CC);
10820 }
10821 }
10822 MF.getProperties().resetNoPHIs();
10823 }
10824
10825 // Handle any remaining bytes with straight-line code.
10826 while (ImmLength > 0) {
10827 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10828 // The previous iteration might have created out-of-range displacements.
10829 // Apply them using LA/LAY if so.
10830 foldDisplIfNeeded(DestBase, DestDisp);
10831 foldDisplIfNeeded(SrcBase, SrcDisp);
10832 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10833 DestDisp += ThisLength;
10834 SrcDisp += ThisLength;
10835 ImmLength -= ThisLength;
10836 // If there's another CLC to go, branch to the end if a difference
10837 // was found.
10838 if (EndMBB && ImmLength > 0) {
10839 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10840 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10842 .addMBB(EndMBB);
10843 MBB->addSuccessor(EndMBB);
10844 MBB->addSuccessor(NextMBB);
10845 MBB = NextMBB;
10846 }
10847 }
10848 if (EndMBB) {
10849 MBB->addSuccessor(EndMBB);
10850 MBB = EndMBB;
10851 MBB->addLiveIn(SystemZ::CC);
10852 }
10853
10854 MI.eraseFromParent();
10855 return MBB;
10856}
10857
10858// Decompose string pseudo-instruction MI into a loop that continually performs
10859// Opcode until CC != 3.
10860MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10861 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10862 MachineFunction &MF = *MBB->getParent();
10863 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10864 MachineRegisterInfo &MRI = MF.getRegInfo();
10865 DebugLoc DL = MI.getDebugLoc();
10866
10867 uint64_t End1Reg = MI.getOperand(0).getReg();
10868 uint64_t Start1Reg = MI.getOperand(1).getReg();
10869 uint64_t Start2Reg = MI.getOperand(2).getReg();
10870 uint64_t CharReg = MI.getOperand(3).getReg();
10871
10872 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10873 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10874 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10875 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10876
10877 MachineBasicBlock *StartMBB = MBB;
10878 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10879 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10880
10881 // StartMBB:
10882 // # fall through to LoopMBB
10883 MBB->addSuccessor(LoopMBB);
10884
10885 // LoopMBB:
10886 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10887 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10888 // R0L = %CharReg
10889 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10890 // JO LoopMBB
10891 // # fall through to DoneMBB
10892 //
10893 // The load of R0L can be hoisted by post-RA LICM.
10894 MBB = LoopMBB;
10895
10896 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10897 .addReg(Start1Reg).addMBB(StartMBB)
10898 .addReg(End1Reg).addMBB(LoopMBB);
10899 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10900 .addReg(Start2Reg).addMBB(StartMBB)
10901 .addReg(End2Reg).addMBB(LoopMBB);
10902 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10903 BuildMI(MBB, DL, TII->get(Opcode))
10904 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10905 .addReg(This1Reg).addReg(This2Reg);
10906 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10908 MBB->addSuccessor(LoopMBB);
10909 MBB->addSuccessor(DoneMBB);
10910
10911 DoneMBB->addLiveIn(SystemZ::CC);
10912
10913 MI.eraseFromParent();
10914 return DoneMBB;
10915}
10916
10917// Update TBEGIN instruction with final opcode and register clobbers.
10918MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10919 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10920 bool NoFloat) const {
10921 MachineFunction &MF = *MBB->getParent();
10922 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10923 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10924
10925 // Update opcode.
10926 MI.setDesc(TII->get(Opcode));
10927
10928 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10929 // Make sure to add the corresponding GRSM bits if they are missing.
10930 uint64_t Control = MI.getOperand(2).getImm();
10931 static const unsigned GPRControlBit[16] = {
10932 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10933 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10934 };
10935 Control |= GPRControlBit[15];
10936 if (TFI->hasFP(MF))
10937 Control |= GPRControlBit[11];
10938 MI.getOperand(2).setImm(Control);
10939
10940 // Add GPR clobbers.
10941 for (int I = 0; I < 16; I++) {
10942 if ((Control & GPRControlBit[I]) == 0) {
10943 unsigned Reg = SystemZMC::GR64Regs[I];
10944 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10945 }
10946 }
10947
10948 // Add FPR/VR clobbers.
10949 if (!NoFloat && (Control & 4) != 0) {
10950 if (Subtarget.hasVector()) {
10951 for (unsigned Reg : SystemZMC::VR128Regs) {
10952 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10953 }
10954 } else {
10955 for (unsigned Reg : SystemZMC::FP64Regs) {
10956 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10957 }
10958 }
10959 }
10960
10961 return MBB;
10962}
10963
10964MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10965 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10966 MachineFunction &MF = *MBB->getParent();
10967 MachineRegisterInfo *MRI = &MF.getRegInfo();
10968 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10969 DebugLoc DL = MI.getDebugLoc();
10970
10971 Register SrcReg = MI.getOperand(0).getReg();
10972
10973 // Create new virtual register of the same class as source.
10974 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10975 Register DstReg = MRI->createVirtualRegister(RC);
10976
10977 // Replace pseudo with a normal load-and-test that models the def as
10978 // well.
10979 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10980 .addReg(SrcReg)
10981 .setMIFlags(MI.getFlags());
10982 MI.eraseFromParent();
10983
10984 return MBB;
10985}
10986
10987MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10989 MachineFunction &MF = *MBB->getParent();
10990 MachineRegisterInfo *MRI = &MF.getRegInfo();
10991 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10992 DebugLoc DL = MI.getDebugLoc();
10993 const unsigned ProbeSize = getStackProbeSize(MF);
10994 Register DstReg = MI.getOperand(0).getReg();
10995 Register SizeReg = MI.getOperand(2).getReg();
10996
10997 MachineBasicBlock *StartMBB = MBB;
10998 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
10999 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
11000 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
11001 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
11002 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
11003
11004 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
11006
11007 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11008 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11009
11010 // LoopTestMBB
11011 // BRC TailTestMBB
11012 // # fallthrough to LoopBodyMBB
11013 StartMBB->addSuccessor(LoopTestMBB);
11014 MBB = LoopTestMBB;
11015 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
11016 .addReg(SizeReg)
11017 .addMBB(StartMBB)
11018 .addReg(IncReg)
11019 .addMBB(LoopBodyMBB);
11020 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
11021 .addReg(PHIReg)
11022 .addImm(ProbeSize);
11023 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11025 .addMBB(TailTestMBB);
11026 MBB->addSuccessor(LoopBodyMBB);
11027 MBB->addSuccessor(TailTestMBB);
11028
11029 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
11030 // J LoopTestMBB
11031 MBB = LoopBodyMBB;
11032 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
11033 .addReg(PHIReg)
11034 .addImm(ProbeSize);
11035 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
11036 .addReg(SystemZ::R15D)
11037 .addImm(ProbeSize);
11038 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11039 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
11040 .setMemRefs(VolLdMMO);
11041 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
11042 MBB->addSuccessor(LoopTestMBB);
11043
11044 // TailTestMBB
11045 // BRC DoneMBB
11046 // # fallthrough to TailMBB
11047 MBB = TailTestMBB;
11048 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
11049 .addReg(PHIReg)
11050 .addImm(0);
11051 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11053 .addMBB(DoneMBB);
11054 MBB->addSuccessor(TailMBB);
11055 MBB->addSuccessor(DoneMBB);
11056
11057 // TailMBB
11058 // # fallthrough to DoneMBB
11059 MBB = TailMBB;
11060 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
11061 .addReg(SystemZ::R15D)
11062 .addReg(PHIReg);
11063 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11064 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
11065 .setMemRefs(VolLdMMO);
11066 MBB->addSuccessor(DoneMBB);
11067
11068 // DoneMBB
11069 MBB = DoneMBB;
11070 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
11071 .addReg(SystemZ::R15D);
11072
11073 MI.eraseFromParent();
11074 return DoneMBB;
11075}
11076
11077SDValue SystemZTargetLowering::
11078getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
11079 MachineFunction &MF = DAG.getMachineFunction();
11080 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
11081 SDLoc DL(SP);
11082 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
11083 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
11084}
11085
11088 switch (MI.getOpcode()) {
11089 case SystemZ::ADJCALLSTACKDOWN:
11090 case SystemZ::ADJCALLSTACKUP:
11091 return emitAdjCallStack(MI, MBB);
11092
11093 case SystemZ::Select32:
11094 case SystemZ::Select64:
11095 case SystemZ::Select128:
11096 case SystemZ::SelectF32:
11097 case SystemZ::SelectF64:
11098 case SystemZ::SelectF128:
11099 case SystemZ::SelectVR32:
11100 case SystemZ::SelectVR64:
11101 case SystemZ::SelectVR128:
11102 return emitSelect(MI, MBB);
11103
11104 case SystemZ::CondStore8Mux:
11105 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
11106 case SystemZ::CondStore8MuxInv:
11107 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
11108 case SystemZ::CondStore16Mux:
11109 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
11110 case SystemZ::CondStore16MuxInv:
11111 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
11112 case SystemZ::CondStore32Mux:
11113 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
11114 case SystemZ::CondStore32MuxInv:
11115 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
11116 case SystemZ::CondStore8:
11117 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
11118 case SystemZ::CondStore8Inv:
11119 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
11120 case SystemZ::CondStore16:
11121 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
11122 case SystemZ::CondStore16Inv:
11123 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
11124 case SystemZ::CondStore32:
11125 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
11126 case SystemZ::CondStore32Inv:
11127 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
11128 case SystemZ::CondStore64:
11129 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
11130 case SystemZ::CondStore64Inv:
11131 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
11132 case SystemZ::CondStoreF32:
11133 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
11134 case SystemZ::CondStoreF32Inv:
11135 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
11136 case SystemZ::CondStoreF64:
11137 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
11138 case SystemZ::CondStoreF64Inv:
11139 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
11140
11141 case SystemZ::SCmp128Hi:
11142 return emitICmp128Hi(MI, MBB, false);
11143 case SystemZ::UCmp128Hi:
11144 return emitICmp128Hi(MI, MBB, true);
11145
11146 case SystemZ::PAIR128:
11147 return emitPair128(MI, MBB);
11148 case SystemZ::AEXT128:
11149 return emitExt128(MI, MBB, false);
11150 case SystemZ::ZEXT128:
11151 return emitExt128(MI, MBB, true);
11152
11153 case SystemZ::ATOMIC_SWAPW:
11154 return emitAtomicLoadBinary(MI, MBB, 0);
11155
11156 case SystemZ::ATOMIC_LOADW_AR:
11157 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11158 case SystemZ::ATOMIC_LOADW_AFI:
11159 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11160
11161 case SystemZ::ATOMIC_LOADW_SR:
11162 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11163
11164 case SystemZ::ATOMIC_LOADW_NR:
11165 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11166 case SystemZ::ATOMIC_LOADW_NILH:
11167 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11168
11169 case SystemZ::ATOMIC_LOADW_OR:
11170 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11171 case SystemZ::ATOMIC_LOADW_OILH:
11172 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11173
11174 case SystemZ::ATOMIC_LOADW_XR:
11175 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11176 case SystemZ::ATOMIC_LOADW_XILF:
11177 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11178
11179 case SystemZ::ATOMIC_LOADW_NRi:
11180 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11181 case SystemZ::ATOMIC_LOADW_NILHi:
11182 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11183
11184 case SystemZ::ATOMIC_LOADW_MIN:
11185 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11186 case SystemZ::ATOMIC_LOADW_MAX:
11187 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11188 case SystemZ::ATOMIC_LOADW_UMIN:
11189 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11190 case SystemZ::ATOMIC_LOADW_UMAX:
11191 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11192
11193 case SystemZ::ATOMIC_CMP_SWAPW:
11194 return emitAtomicCmpSwapW(MI, MBB);
11195 case SystemZ::MVCImm:
11196 case SystemZ::MVCReg:
11197 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11198 case SystemZ::NCImm:
11199 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11200 case SystemZ::OCImm:
11201 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11202 case SystemZ::XCImm:
11203 case SystemZ::XCReg:
11204 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11205 case SystemZ::CLCImm:
11206 case SystemZ::CLCReg:
11207 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11208 case SystemZ::MemsetImmImm:
11209 case SystemZ::MemsetImmReg:
11210 case SystemZ::MemsetRegImm:
11211 case SystemZ::MemsetRegReg:
11212 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11213 case SystemZ::CLSTLoop:
11214 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11215 case SystemZ::MVSTLoop:
11216 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11217 case SystemZ::SRSTLoop:
11218 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11219 case SystemZ::TBEGIN:
11220 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11221 case SystemZ::TBEGIN_nofloat:
11222 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11223 case SystemZ::TBEGINC:
11224 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11225 case SystemZ::LTEBRCompare_Pseudo:
11226 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11227 case SystemZ::LTDBRCompare_Pseudo:
11228 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11229 case SystemZ::LTXBRCompare_Pseudo:
11230 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11231
11232 case SystemZ::PROBED_ALLOCA:
11233 return emitProbedAlloca(MI, MBB);
11234 case SystemZ::EH_SjLj_SetJmp:
11235 return emitEHSjLjSetJmp(MI, MBB);
11236 case SystemZ::EH_SjLj_LongJmp:
11237 return emitEHSjLjLongJmp(MI, MBB);
11238
11239 case TargetOpcode::STACKMAP:
11240 case TargetOpcode::PATCHPOINT:
11241 return emitPatchPoint(MI, MBB);
11242
11243 default:
11244 llvm_unreachable("Unexpected instr type to insert");
11245 }
11246}
11247
11248// This is only used by the isel schedulers, and is needed only to prevent
11249// compiler from crashing when list-ilp is used.
11250const TargetRegisterClass *
11251SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11252 if (VT == MVT::Untyped)
11253 return &SystemZ::ADDR128BitRegClass;
11255}
11256
11257SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11258 SelectionDAG &DAG) const {
11259 SDLoc dl(Op);
11260 /*
11261 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11262 settings:
11263 00 Round to nearest
11264 01 Round to 0
11265 10 Round to +inf
11266 11 Round to -inf
11267
11268 FLT_ROUNDS, on the other hand, expects the following:
11269 -1 Undefined
11270 0 Round to 0
11271 1 Round to nearest
11272 2 Round to +inf
11273 3 Round to -inf
11274 */
11275
11276 // Save FPC to register.
11277 SDValue Chain = Op.getOperand(0);
11278 SDValue EFPC(
11279 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11280 Chain = EFPC.getValue(1);
11281
11282 // Transform as necessary
11283 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11284 DAG.getConstant(3, dl, MVT::i32));
11285 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11286 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11287 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11288 DAG.getConstant(1, dl, MVT::i32)));
11289
11290 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11291 DAG.getConstant(1, dl, MVT::i32));
11292 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11293
11294 return DAG.getMergeValues({RetVal, Chain}, dl);
11295}
11296
11297SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11298 SelectionDAG &DAG) const {
11299 EVT VT = Op.getValueType();
11300 Op = Op.getOperand(0);
11301 EVT OpVT = Op.getValueType();
11302
11303 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11304
11305 SDLoc DL(Op);
11306
11307 // load a 0 vector for the third operand of VSUM.
11308 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11309
11310 // execute VSUM.
11311 switch (OpVT.getScalarSizeInBits()) {
11312 case 8:
11313 case 16:
11314 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11315 [[fallthrough]];
11316 case 32:
11317 case 64:
11318 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11319 DAG.getBitcast(Op.getValueType(), Zero));
11320 break;
11321 case 128:
11322 break; // VSUM over v1i128 should not happen and would be a noop
11323 default:
11324 llvm_unreachable("Unexpected scalar size.");
11325 }
11326 // Cast to original vector type, retrieve last element.
11327 return DAG.getNode(
11328 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11329 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11330}
11331
11333 FunctionType *FT = F->getFunctionType();
11334 const AttributeList &Attrs = F->getAttributes();
11335 if (Attrs.hasRetAttrs())
11336 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11337 OS << *F->getReturnType() << " @" << F->getName() << "(";
11338 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11339 if (I)
11340 OS << ", ";
11341 OS << *FT->getParamType(I);
11342 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11343 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11344 if (ArgAttrs.hasAttribute(A))
11345 OS << " " << Attribute::getNameFromAttrKind(A);
11346 }
11347 OS << ")\n";
11348}
11349
11350bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11351 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11352 if (Itr == IsInternalCache.end())
11353 Itr = IsInternalCache
11354 .insert(std::pair<const Function *, bool>(
11355 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11356 .first;
11357 return Itr->second;
11358}
11359
11360void SystemZTargetLowering::
11361verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11362 const Function *F, SDValue Callee) const {
11363 // Temporarily only do the check when explicitly requested, until it can be
11364 // enabled by default.
11366 return;
11367
11368 bool IsInternal = false;
11369 const Function *CalleeFn = nullptr;
11370 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11371 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11372 IsInternal = isInternal(CalleeFn);
11373 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11374 errs() << "ERROR: Missing extension attribute of passed "
11375 << "value in call to function:\n" << "Callee: ";
11376 if (CalleeFn != nullptr)
11377 printFunctionArgExts(CalleeFn, errs());
11378 else
11379 errs() << "-\n";
11380 errs() << "Caller: ";
11382 llvm_unreachable("");
11383 }
11384}
11385
11386void SystemZTargetLowering::
11387verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11388 const Function *F) const {
11389 // Temporarily only do the check when explicitly requested, until it can be
11390 // enabled by default.
11392 return;
11393
11394 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11395 errs() << "ERROR: Missing extension attribute of returned "
11396 << "value from function:\n";
11398 llvm_unreachable("");
11399 }
11400}
11401
11402// Verify that narrow integer arguments are extended as required by the ABI.
11403// Return false if an error is found.
11404bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11405 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11406 if (!Subtarget.isTargetELF())
11407 return true;
11408
11411 return true;
11412 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11413 return true;
11414
11415 for (unsigned i = 0; i < Outs.size(); ++i) {
11416 MVT VT = Outs[i].VT;
11417 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11418 if (VT.isInteger()) {
11419 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11420 "Unexpected integer argument VT.");
11421 if (VT == MVT::i32 &&
11422 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11423 return false;
11424 }
11425 }
11426
11427 return true;
11428}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static std::pair< SDValue, int > findCCUse(const SDValue &Val)
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:322
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:951
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:215
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
size_type size() const
Definition SmallSet.h:170
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:686
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
iterator end() const
Definition StringRef.h:114
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:464
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:463
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
#define EQ(a, b)
Definition regexec.c:65
#define NC
Definition regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.