X86Subtarget.h
32.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
//===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares the X86 specific subclass of TargetSubtargetInfo.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
#define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
#include "X86FrameLowering.h"
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/CallingConv.h"
#include <climits>
#include <memory>
#define GET_SUBTARGETINFO_HEADER
#include "X86GenSubtargetInfo.inc"
namespace llvm {
class CallLowering;
class GlobalValue;
class InstructionSelector;
class LegalizerInfo;
class RegisterBankInfo;
class StringRef;
class TargetMachine;
/// The X86 backend supports a number of different styles of PIC.
///
namespace PICStyles {
enum class Style {
StubPIC, // Used on i386-darwin in pic mode.
GOT, // Used on 32 bit elf on when in pic mode.
RIPRel, // Used on X86-64 when in pic mode.
None // Set when not in pic mode.
};
} // end namespace PICStyles
class X86Subtarget final : public X86GenSubtargetInfo {
public:
// NOTE: Do not add anything new to this list. Coarse, CPU name based flags
// are not a good idea. We should be migrating away from these.
enum X86ProcFamilyEnum {
Others,
IntelAtom,
IntelSLM
};
protected:
enum X86SSEEnum {
NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
};
enum X863DNowEnum {
NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
};
/// X86 processor family: Intel Atom, and others
X86ProcFamilyEnum X86ProcFamily = Others;
/// Which PIC style to use
PICStyles::Style PICStyle;
const TargetMachine &TM;
/// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
X86SSEEnum X86SSELevel = NoSSE;
/// MMX, 3DNow, 3DNow Athlon, or none supported.
X863DNowEnum X863DNowLevel = NoThreeDNow;
/// True if the processor supports X87 instructions.
bool HasX87 = false;
/// True if the processor supports CMPXCHG8B.
bool HasCmpxchg8b = false;
/// True if this processor has NOPL instruction
/// (generally pentium pro+).
bool HasNOPL = false;
/// True if this processor has conditional move instructions
/// (generally pentium pro+).
bool HasCMov = false;
/// True if the processor supports X86-64 instructions.
bool HasX86_64 = false;
/// True if the processor supports POPCNT.
bool HasPOPCNT = false;
/// True if the processor supports SSE4A instructions.
bool HasSSE4A = false;
/// Target has AES instructions
bool HasAES = false;
bool HasVAES = false;
/// Target has FXSAVE/FXRESTOR instructions
bool HasFXSR = false;
/// Target has XSAVE instructions
bool HasXSAVE = false;
/// Target has XSAVEOPT instructions
bool HasXSAVEOPT = false;
/// Target has XSAVEC instructions
bool HasXSAVEC = false;
/// Target has XSAVES instructions
bool HasXSAVES = false;
/// Target has carry-less multiplication
bool HasPCLMUL = false;
bool HasVPCLMULQDQ = false;
/// Target has Galois Field Arithmetic instructions
bool HasGFNI = false;
/// Target has 3-operand fused multiply-add
bool HasFMA = false;
/// Target has 4-operand fused multiply-add
bool HasFMA4 = false;
/// Target has XOP instructions
bool HasXOP = false;
/// Target has TBM instructions.
bool HasTBM = false;
/// Target has LWP instructions
bool HasLWP = false;
/// True if the processor has the MOVBE instruction.
bool HasMOVBE = false;
/// True if the processor has the RDRAND instruction.
bool HasRDRAND = false;
/// Processor has 16-bit floating point conversion instructions.
bool HasF16C = false;
/// Processor has FS/GS base insturctions.
bool HasFSGSBase = false;
/// Processor has LZCNT instruction.
bool HasLZCNT = false;
/// Processor has BMI1 instructions.
bool HasBMI = false;
/// Processor has BMI2 instructions.
bool HasBMI2 = false;
/// Processor has VBMI instructions.
bool HasVBMI = false;
/// Processor has VBMI2 instructions.
bool HasVBMI2 = false;
/// Processor has Integer Fused Multiply Add
bool HasIFMA = false;
/// Processor has RTM instructions.
bool HasRTM = false;
/// Processor has ADX instructions.
bool HasADX = false;
/// Processor has SHA instructions.
bool HasSHA = false;
/// Processor has PRFCHW instructions.
bool HasPRFCHW = false;
/// Processor has RDSEED instructions.
bool HasRDSEED = false;
/// Processor has LAHF/SAHF instructions.
bool HasLAHFSAHF = false;
/// Processor has MONITORX/MWAITX instructions.
bool HasMWAITX = false;
/// Processor has Cache Line Zero instruction
bool HasCLZERO = false;
/// Processor has Cache Line Demote instruction
bool HasCLDEMOTE = false;
/// Processor has MOVDIRI instruction (direct store integer).
bool HasMOVDIRI = false;
/// Processor has MOVDIR64B instruction (direct store 64 bytes).
bool HasMOVDIR64B = false;
/// Processor has ptwrite instruction.
bool HasPTWRITE = false;
/// Processor has Prefetch with intent to Write instruction
bool HasPREFETCHWT1 = false;
/// True if SHLD instructions are slow.
bool IsSHLDSlow = false;
/// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
// PMULUDQ.
bool IsPMULLDSlow = false;
/// True if the PMADDWD instruction is slow compared to PMULLD.
bool IsPMADDWDSlow = false;
/// True if unaligned memory accesses of 16-bytes are slow.
bool IsUAMem16Slow = false;
/// True if unaligned memory accesses of 32-bytes are slow.
bool IsUAMem32Slow = false;
/// True if SSE operations can have unaligned memory operands.
/// This may require setting a configuration bit in the processor.
bool HasSSEUnalignedMem = false;
/// True if this processor has the CMPXCHG16B instruction;
/// this is true for most x86-64 chips, but not the first AMD chips.
bool HasCmpxchg16b = false;
/// True if the LEA instruction should be used for adjusting
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP = false;
/// True if POPCNT instruction has a false dependency on the destination register.
bool HasPOPCNTFalseDeps = false;
/// True if LZCNT/TZCNT instructions have a false dependency on the destination register.
bool HasLZCNTFalseDeps = false;
/// True if its preferable to combine to a single shuffle using a variable
/// mask over multiple fixed shuffles.
bool HasFastVariableShuffle = false;
/// True if vzeroupper instructions should be inserted after code that uses
/// ymm or zmm registers.
bool InsertVZEROUPPER = false;
/// True if there is no performance penalty for writing NOPs with up to
/// 7 bytes.
bool HasFast7ByteNOP = false;
/// True if there is no performance penalty for writing NOPs with up to
/// 11 bytes.
bool HasFast11ByteNOP = false;
/// True if there is no performance penalty for writing NOPs with up to
/// 15 bytes.
bool HasFast15ByteNOP = false;
/// True if gather is reasonably fast. This is true for Skylake client and
/// all AVX-512 CPUs.
bool HasFastGather = false;
/// True if hardware SQRTSS instruction is at least as fast (latency) as
/// RSQRTSS followed by a Newton-Raphson iteration.
bool HasFastScalarFSQRT = false;
/// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
/// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
bool HasFastVectorFSQRT = false;
/// True if 8-bit divisions are significantly faster than
/// 32-bit divisions and should be used when possible.
bool HasSlowDivide32 = false;
/// True if 32-bit divides are significantly faster than
/// 64-bit divisions and should be used when possible.
bool HasSlowDivide64 = false;
/// True if LZCNT instruction is fast.
bool HasFastLZCNT = false;
/// True if SHLD based rotate is fast.
bool HasFastSHLDRotate = false;
/// True if the processor supports macrofusion.
bool HasMacroFusion = false;
/// True if the processor supports branch fusion.
bool HasBranchFusion = false;
/// True if the processor has enhanced REP MOVSB/STOSB.
bool HasERMSB = false;
/// True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions = false;
/// True if two memory operand instructions should use a temporary register
/// instead.
bool SlowTwoMemOps = false;
/// True if the LEA instruction inputs have to be ready at address generation
/// (AG) time.
bool LEAUsesAG = false;
/// True if the LEA instruction with certain arguments is slow
bool SlowLEA = false;
/// True if the LEA instruction has all three source operands: base, index,
/// and offset or if the LEA instruction uses base and index registers where
/// the base is EBP, RBP,or R13
bool Slow3OpsLEA = false;
/// True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec = false;
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI = false;
/// Processor has AVX-512 Exponential and Reciprocal Instructions
bool HasERI = false;
/// Processor has AVX-512 Conflict Detection Instructions
bool HasCDI = false;
/// Processor has AVX-512 population count Instructions
bool HasVPOPCNTDQ = false;
/// Processor has AVX-512 Doubleword and Quadword instructions
bool HasDQI = false;
/// Processor has AVX-512 Byte and Word instructions
bool HasBWI = false;
/// Processor has AVX-512 Vector Length eXtenstions
bool HasVLX = false;
/// Processor has PKU extenstions
bool HasPKU = false;
/// Processor has AVX-512 Vector Neural Network Instructions
bool HasVNNI = false;
/// Processor has AVX-512 bfloat16 floating-point extensions
bool HasBF16 = false;
/// Processor supports ENQCMD instructions
bool HasENQCMD = false;
/// Processor has AVX-512 Bit Algorithms instructions
bool HasBITALG = false;
/// Processor has AVX-512 vp2intersect instructions
bool HasVP2INTERSECT = false;
/// Deprecated flag for MPX instructions.
bool DeprecatedHasMPX = false;
/// Processor supports CET SHSTK - Control-Flow Enforcement Technology
/// using Shadow Stack
bool HasSHSTK = false;
/// Processor supports Invalidate Process-Context Identifier
bool HasINVPCID = false;
/// Processor has Software Guard Extensions
bool HasSGX = false;
/// Processor supports Flush Cache Line instruction
bool HasCLFLUSHOPT = false;
/// Processor supports Cache Line Write Back instruction
bool HasCLWB = false;
/// Processor supports Write Back No Invalidate instruction
bool HasWBNOINVD = false;
/// Processor support RDPID instruction
bool HasRDPID = false;
/// Processor supports WaitPKG instructions
bool HasWAITPKG = false;
/// Processor supports PCONFIG instruction
bool HasPCONFIG = false;
/// Processor supports SERIALIZE instruction
bool HasSERIALIZE = false;
/// Processor supports TSXLDTRK instruction
bool HasTSXLDTRK = false;
/// Processor has AMX support
bool HasAMXTILE = false;
bool HasAMXBF16 = false;
bool HasAMXINT8 = false;
/// Processor has a single uop BEXTR implementation.
bool HasFastBEXTR = false;
/// Try harder to combine to horizontal vector ops if they are fast.
bool HasFastHorizontalOps = false;
/// Prefer a left/right scalar logical shifts pair over a shift+and pair.
bool HasFastScalarShiftMasks = false;
/// Prefer a left/right vector logical shifts pair over a shift+and pair.
bool HasFastVectorShiftMasks = false;
/// Use a retpoline thunk rather than indirect calls to block speculative
/// execution.
bool UseRetpolineIndirectCalls = false;
/// Use a retpoline thunk or remove any indirect branch to block speculative
/// execution.
bool UseRetpolineIndirectBranches = false;
/// Deprecated flag, query `UseRetpolineIndirectCalls` and
/// `UseRetpolineIndirectBranches` instead.
bool DeprecatedUseRetpoline = false;
/// When using a retpoline thunk, call an externally provided thunk rather
/// than emitting one inside the compiler.
bool UseRetpolineExternalThunk = false;
/// Prevent generation of indirect call/branch instructions from memory,
/// and force all indirect call/branch instructions from a register to be
/// preceded by an LFENCE. Also decompose RET instructions into a
/// POP+LFENCE+JMP sequence.
bool UseLVIControlFlowIntegrity = false;
/// Enable Speculative Execution Side Effect Suppression
bool UseSpeculativeExecutionSideEffectSuppression = false;
/// Insert LFENCE instructions to prevent data speculatively injected into
/// loads from being used maliciously.
bool UseLVILoadHardening = false;
/// Use software floating point for code generation.
bool UseSoftFloat = false;
/// Use alias analysis during code generation.
bool UseAA = false;
/// The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
Align stackAlignment = Align(4);
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
// FIXME: this is a known good value for Yonah. How about others?
unsigned MaxInlineSizeThreshold = 128;
/// Indicates target prefers 128 bit instructions.
bool Prefer128Bit = false;
/// Indicates target prefers 256 bit instructions.
bool Prefer256Bit = false;
/// Indicates target prefers AVX512 mask registers.
bool PreferMaskRegisters = false;
/// Threeway branch is profitable in this subtarget.
bool ThreewayBranchProfitable = false;
/// Use Goldmont specific floating point div/sqrt costs.
bool UseGLMDivSqrtCosts = false;
/// What processor and OS we're targeting.
Triple TargetTriple;
/// GlobalISel related APIs.
std::unique_ptr<CallLowering> CallLoweringInfo;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
std::unique_ptr<InstructionSelector> InstSelector;
private:
/// Override the stack alignment.
MaybeAlign StackAlignOverride;
/// Preferred vector width from function attribute.
unsigned PreferVectorWidthOverride;
/// Resolved preferred vector width from function attribute and subtarget
/// features.
unsigned PreferVectorWidth = UINT32_MAX;
/// Required vector width from function attribute.
unsigned RequiredVectorWidth;
/// True if compiling for 64-bit, false for 16-bit or 32-bit.
bool In64BitMode;
/// True if compiling for 32-bit, false for 16-bit or 64-bit.
bool In32BitMode;
/// True if compiling for 16-bit, false for 32-bit or 64-bit.
bool In16BitMode;
/// Contains the Overhead of gather\scatter instructions
int GatherOverhead = 1024;
int ScatterOverhead = 1024;
X86SelectionDAGInfo TSInfo;
// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
// X86TargetLowering needs.
X86InstrInfo InstrInfo;
X86TargetLowering TLInfo;
X86FrameLowering FrameLowering;
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
unsigned PreferVectorWidthOverride,
unsigned RequiredVectorWidth);
const X86TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
const X86FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
const X86RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
/// Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
Align getStackAlignment() const { return stackAlignment; }
/// Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
/// Methods used by Global ISel
const CallLowering *getCallLowering() const override;
InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
private:
/// Initialize the full set of dependencies so we can use an initializer
/// list for X86Subtarget.
X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void initSubtargetFeatures(StringRef CPU, StringRef FS);
public:
/// Is this x86_64? (disregarding specific ABI / programming model)
bool is64Bit() const {
return In64BitMode;
}
bool is32Bit() const {
return In32BitMode;
}
bool is16Bit() const {
return In16BitMode;
}
/// Is this x86_64 with the ILP32 programming model (x32 ABI)?
bool isTarget64BitILP32() const {
return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32 ||
TargetTriple.isOSNaCl());
}
/// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
bool isTarget64BitLP64() const {
return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32 &&
!TargetTriple.isOSNaCl());
}
PICStyles::Style getPICStyle() const { return PICStyle; }
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
bool hasX87() const { return HasX87; }
bool hasCmpxchg8b() const { return HasCmpxchg8b; }
bool hasNOPL() const { return HasNOPL; }
// SSE codegen depends on cmovs, and all SSE1+ processors support them.
// All 64-bit processors support cmov.
bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }
bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
bool hasSSE41() const { return X86SSELevel >= SSE41; }
bool hasSSE42() const { return X86SSELevel >= SSE42; }
bool hasAVX() const { return X86SSELevel >= AVX; }
bool hasAVX2() const { return X86SSELevel >= AVX2; }
bool hasAVX512() const { return X86SSELevel >= AVX512F; }
bool hasInt256() const { return hasAVX2(); }
bool hasSSE4A() const { return HasSSE4A; }
bool hasMMX() const { return X863DNowLevel >= MMX; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAES() const { return HasAES; }
bool hasVAES() const { return HasVAES; }
bool hasFXSR() const { return HasFXSR; }
bool hasXSAVE() const { return HasXSAVE; }
bool hasXSAVEOPT() const { return HasXSAVEOPT; }
bool hasXSAVEC() const { return HasXSAVEC; }
bool hasXSAVES() const { return HasXSAVES; }
bool hasPCLMUL() const { return HasPCLMUL; }
bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
bool hasGFNI() const { return HasGFNI; }
// Prefer FMA4 to FMA - its better for commutation/memory folding and
// has equal or better performance on all supported targets.
bool hasFMA() const { return HasFMA; }
bool hasFMA4() const { return HasFMA4; }
bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
bool hasXOP() const { return HasXOP; }
bool hasTBM() const { return HasTBM; }
bool hasLWP() const { return HasLWP; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
bool hasF16C() const { return HasF16C; }
bool hasFSGSBase() const { return HasFSGSBase; }
bool hasLZCNT() const { return HasLZCNT; }
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
bool hasVBMI() const { return HasVBMI; }
bool hasVBMI2() const { return HasVBMI2; }
bool hasIFMA() const { return HasIFMA; }
bool hasRTM() const { return HasRTM; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
bool hasPrefetchW() const {
// The PREFETCHW instruction was added with 3DNow but later CPUs gave it
// its own CPUID bit as part of deprecating 3DNow. Intel eventually added
// it and KNL has another that prefetches to L2 cache. We assume the
// L1 version exists if the L2 version does.
return has3DNow() || hasPRFCHW() || hasPREFETCHWT1();
}
bool hasSSEPrefetch() const {
// We implicitly enable these when we have a write prefix supporting cache
// level OR if we have prfchw, but don't already have a read prefetch from
// 3dnow.
return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
}
bool hasRDSEED() const { return HasRDSEED; }
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool hasMWAITX() const { return HasMWAITX; }
bool hasCLZERO() const { return HasCLZERO; }
bool hasCLDEMOTE() const { return HasCLDEMOTE; }
bool hasMOVDIRI() const { return HasMOVDIRI; }
bool hasMOVDIR64B() const { return HasMOVDIR64B; }
bool hasPTWRITE() const { return HasPTWRITE; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isPMULLDSlow() const { return IsPMULLDSlow; }
bool isPMADDWDSlow() const { return IsPMADDWDSlow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
int getGatherOverhead() const { return GatherOverhead; }
int getScatterOverhead() const { return ScatterOverhead; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); }
bool useLeaForSP() const { return UseLeaForSP; }
bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }
bool hasFastVariableShuffle() const {
return HasFastVariableShuffle;
}
bool insertVZEROUPPER() const { return InsertVZEROUPPER; }
bool hasFastGather() const { return HasFastGather; }
bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
bool hasFastBEXTR() const { return HasFastBEXTR; }
bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
bool hasMacroFusion() const { return HasMacroFusion; }
bool hasBranchFusion() const { return HasBranchFusion; }
bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
bool slowTwoMemOps() const { return SlowTwoMemOps; }
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
bool slow3OpsLEA() const { return Slow3OpsLEA; }
bool slowIncDec() const { return SlowIncDec; }
bool hasCDI() const { return HasCDI; }
bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
bool hasPKU() const { return HasPKU; }
bool hasVNNI() const { return HasVNNI; }
bool hasBF16() const { return HasBF16; }
bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
bool hasBITALG() const { return HasBITALG; }
bool hasSHSTK() const { return HasSHSTK; }
bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
bool hasCLWB() const { return HasCLWB; }
bool hasWBNOINVD() const { return HasWBNOINVD; }
bool hasRDPID() const { return HasRDPID; }
bool hasWAITPKG() const { return HasWAITPKG; }
bool hasPCONFIG() const { return HasPCONFIG; }
bool hasSGX() const { return HasSGX; }
bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
bool hasINVPCID() const { return HasINVPCID; }
bool hasENQCMD() const { return HasENQCMD; }
bool hasSERIALIZE() const { return HasSERIALIZE; }
bool hasTSXLDTRK() const { return HasTSXLDTRK; }
bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
bool useRetpolineIndirectBranches() const {
return UseRetpolineIndirectBranches;
}
bool hasAMXTILE() const { return HasAMXTILE; }
bool hasAMXBF16() const { return HasAMXBF16; }
bool hasAMXINT8() const { return HasAMXINT8; }
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
// These are generic getters that OR together all of the thunk types
// supported by the subtarget. Therefore useIndirectThunk*() will return true
// if any respective thunk feature is enabled.
bool useIndirectThunkCalls() const {
return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
}
bool useIndirectThunkBranches() const {
return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
}
bool preferMaskRegisters() const { return PreferMaskRegisters; }
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
bool useLVILoadHardening() const { return UseLVILoadHardening; }
bool useSpeculativeExecutionSideEffectSuppression() const {
return UseSpeculativeExecutionSideEffectSuppression;
}
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
// Helper functions to determine when we should allow widening to 512-bit
// during codegen.
// TODO: Currently we're always allowing widening on CPUs without VLX,
// because for many cases we don't have a better option.
bool canExtendTo512DQ() const {
return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
}
bool canExtendTo512BW() const {
return hasBWI() && canExtendTo512DQ();
}
// If there are no 512-bit vectors and we prefer not to use 512-bit registers,
// disable them in the legalizer.
bool useAVX512Regs() const {
return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256);
}
bool useBWIRegs() const {
return hasBWI() && useAVX512Regs();
}
bool isXRaySupported() const override { return is64Bit(); }
X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }
/// TODO: to be removed later and replaced with suitable properties
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
bool useSoftFloat() const { return UseSoftFloat; }
bool useAA() const override { return UseAA; }
/// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
/// no-sse2). There isn't any reason to disable it if the target processor
/// supports it.
bool hasMFence() const { return hasSSE2() || is64Bit(); }
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetWindowsMSVC() const {
return TargetTriple.isWindowsMSVCEnvironment();
}
bool isTargetWindowsCoreCLR() const {
return TargetTriple.isWindowsCoreCLREnvironment();
}
bool isTargetWindowsCygwin() const {
return TargetTriple.isWindowsCygwinEnvironment();
}
bool isTargetWindowsGNU() const {
return TargetTriple.isWindowsGNUEnvironment();
}
bool isTargetWindowsItanium() const {
return TargetTriple.isWindowsItaniumEnvironment();
}
bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
bool isOSWindows() const { return TargetTriple.isOSWindows(); }
bool isTargetWin64() const { return In64BitMode && isOSWindows(); }
bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
bool isPICStyleStubPIC() const {
return PICStyle == PICStyles::Style::StubPIC;
}
bool isPositionIndependent() const;
bool isCallingConvWin64(CallingConv::ID CC) const {
switch (CC) {
// On Win64, all these conventions just use the default convention.
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Tail:
case CallingConv::Swift:
case CallingConv::X86_FastCall:
case CallingConv::X86_StdCall:
case CallingConv::X86_ThisCall:
case CallingConv::X86_VectorCall:
case CallingConv::Intel_OCL_BI:
return isTargetWin64();
// This convention allows using the Win64 convention on other targets.
case CallingConv::Win64:
return true;
// This convention allows using the SysV convention on Windows targets.
case CallingConv::X86_64_SysV:
return false;
// Otherwise, who knows what this is.
default:
return false;
}
}
/// Classify a global variable reference for the current subtarget according
/// to how we should reference it in a non-pcrel context.
unsigned char classifyLocalReference(const GlobalValue *GV) const;
unsigned char classifyGlobalReference(const GlobalValue *GV,
const Module &M) const;
unsigned char classifyGlobalReference(const GlobalValue *GV) const;
/// Classify a global function reference for the current subtarget.
unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
const Module &M) const;
unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const;
/// Classify a blockaddress reference for the current subtarget according to
/// how we should reference it in a non-pcrel context.
unsigned char classifyBlockAddressReference() const;
/// Return true if the subtarget allows calls to immediate address.
bool isLegalToCallImmediateAddr() const;
/// If we are using indirect thunks, we need to expand indirectbr to avoid it
/// lowering to an actual indirect jump.
bool enableIndirectBrExpand() const override {
return useIndirectThunkBranches();
}
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
bool enableEarlyIfConversion() const override;
void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
&Mutations) const override;
AntiDepBreakMode getAntiDepBreakMode() const override {
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
}
bool enableAdvancedRASplitCost() const override { return true; }
};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H