opt-shuffle-splat.mir 7.3 KB
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -O1 -verify-machineinstrs %s -o - | FileCheck %s
---
name:            splat_4xi32
alignment:       4
legalized:       true
regBankSelected: true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $w0

    ; CHECK-LABEL: name: splat_4xi32
    ; CHECK: liveins: $w0
    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
    ; CHECK: [[DUPv4i32gpr:%[0-9]+]]:fpr128 = DUPv4i32gpr [[COPY]]
    ; CHECK: $q0 = COPY [[DUPv4i32gpr]]
    ; CHECK: RET_ReallyLR implicit $q0
    %0:gpr(s32) = COPY $w0
    %2:fpr(<4 x s32>) = G_IMPLICIT_DEF
    %3:gpr(s32) = G_CONSTANT i32 0
    %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
    %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0)
    $q0 = COPY %4(<4 x s32>)
    RET_ReallyLR implicit $q0

...
---
name:            splat_2xi64
alignment:       4
legalized:       true
regBankSelected: true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $x0

    ; CHECK-LABEL: name: splat_2xi64
    ; CHECK: liveins: $x0
    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
    ; CHECK: [[DUPv2i64gpr:%[0-9]+]]:fpr128 = DUPv2i64gpr [[COPY]]
    ; CHECK: $q0 = COPY [[DUPv2i64gpr]]
    ; CHECK: RET_ReallyLR implicit $q0
    %0:gpr(s64) = COPY $x0
    %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
    %3:gpr(s32) = G_CONSTANT i32 0
    %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32)
    %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0)
    $q0 = COPY %4(<2 x s64>)
    RET_ReallyLR implicit $q0

...
---
name:            splat_2xi32
alignment:       4
legalized:       true
regBankSelected: true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $w0

    ; CHECK-LABEL: name: splat_2xi32
    ; CHECK: liveins: $w0
    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
    ; CHECK: [[DUPv2i32gpr:%[0-9]+]]:fpr64 = DUPv2i32gpr [[COPY]]
    ; CHECK: $d0 = COPY [[DUPv2i32gpr]]
    ; CHECK: RET_ReallyLR implicit $d0
    %0:gpr(s32) = COPY $w0
    %2:fpr(<2 x s32>) = G_IMPLICIT_DEF
    %3:gpr(s32) = G_CONSTANT i32 0
    %1:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
    %4:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0)
    $d0 = COPY %4(<2 x s32>)
    RET_ReallyLR implicit $d0

...
---
name:            splat_4xf32
alignment:       4
legalized:       true
regBankSelected: true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $s0

    ; CHECK-LABEL: name: splat_4xf32
    ; CHECK: liveins: $s0
    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub
    ; CHECK: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[INSERT_SUBREG]], 0
    ; CHECK: $q0 = COPY [[DUPv4i32lane]]
    ; CHECK: RET_ReallyLR implicit $q0
    %0:fpr(s32) = COPY $s0
    %2:fpr(<4 x s32>) = G_IMPLICIT_DEF
    %3:gpr(s32) = G_CONSTANT i32 0
    %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
    %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0)
    $q0 = COPY %4(<4 x s32>)
    RET_ReallyLR implicit $q0

...
---
name:            splat_2xf64
alignment:       4
legalized:       true
regBankSelected: true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $d0

    ; CHECK-LABEL: name: splat_2xf64
    ; CHECK: liveins: $d0
    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
    ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0
    ; CHECK: $q0 = COPY [[DUPv2i64lane]]
    ; CHECK: RET_ReallyLR implicit $q0
    %0:fpr(s64) = COPY $d0
    %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
    %3:gpr(s32) = G_CONSTANT i32 0
    %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32)
    %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0)
    $q0 = COPY %4(<2 x s64>)
    RET_ReallyLR implicit $q0

...
---
name:            splat_2xf32
alignment:       4
legalized:       true
regBankSelected: true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $s0

    ; CHECK-LABEL: name: splat_2xf32
    ; CHECK: liveins: $s0
    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub
    ; CHECK: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
    ; CHECK: $d0 = COPY [[DUPv2i32lane]]
    ; CHECK: RET_ReallyLR implicit $d0
    %0:fpr(s32) = COPY $s0
    %2:fpr(<2 x s32>) = G_IMPLICIT_DEF
    %3:gpr(s32) = G_CONSTANT i32 0
    %1:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
    %4:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0)
    $d0 = COPY %4(<2 x s32>)
    RET_ReallyLR implicit $d0

...
---
name:            splat_2xf64_copies
alignment:       4
legalized:       true
regBankSelected: true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $d0

    ; This test is exactly the same as splat_2xf64, except it adds two copies.
    ; These copies shouldn't get in the way of matching the dup pattern.
    ; CHECK-LABEL: name: splat_2xf64_copies
    ; CHECK: liveins: $d0
    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
    ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0
    ; CHECK: $q0 = COPY [[DUPv2i64lane]]
    ; CHECK: RET_ReallyLR implicit $q0
    %0:fpr(s64) = COPY $d0
    %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
    %6:fpr(<2 x s64>) = COPY %2
    %3:gpr(s32) = G_CONSTANT i32 0
    %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %6, %0(s64), %3(s32)
    %7:fpr(<2 x s64>) = COPY %1
    %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, shufflemask(0, 0)
    $q0 = COPY %4(<2 x s64>)
    RET_ReallyLR implicit $q0

...
---
name:            not_all_zeros
alignment:       4
legalized:       true
regBankSelected: true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $x0
    ; Make sure that we don't do the optimization when it's not all zeroes.
    ; CHECK-LABEL: name: not_all_zeros
    ; CHECK: liveins: $x0
    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
    ; CHECK: [[INSvi64gpr:%[0-9]+]]:fpr128 = INSvi64gpr [[DEF]], 0, [[COPY]]
    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
    ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE [[INSvi64gpr]], %subreg.qsub0, [[DEF]], %subreg.qsub1
    ; CHECK: [[TBLv16i8Two:%[0-9]+]]:fpr128 = TBLv16i8Two [[REG_SEQUENCE]], [[LDRQui]]
    ; CHECK: $q0 = COPY [[TBLv16i8Two]]
    ; CHECK: RET_ReallyLR implicit $q0
    %0:gpr(s64) = COPY $x0
    %2:fpr(<2 x s64>) = G_IMPLICIT_DEF
    %3:gpr(s32) = G_CONSTANT i32 0
    %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32)
    %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 1)
    $q0 = COPY %4(<2 x s64>)
    RET_ReallyLR implicit $q0