bottleneck-analysis.s 8.52 KB
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake  -bottleneck-analysis < %s | FileCheck %s

.LBB0_4:
  vmovups	(%rsi,%rax,2), %xmm0
  vpermilps	$255, %xmm0, %xmm7
  vmulps	-24(%rsp), %xmm7, %xmm8
  vpermilps	$170, %xmm0, %xmm6
  vpermilps	$85, %xmm0, %xmm5
  vbroadcastss	%xmm0, %xmm0
  vfmadd231ps	%xmm9, %xmm6, %xmm8
  vfmadd213ps	%xmm8, %xmm10, %xmm5
  vfmadd213ps	%xmm5, %xmm11, %xmm0
  vfmadd213ps	%xmm0, %xmm12, %xmm4
  vfmadd213ps	%xmm4, %xmm13, %xmm1
  vmovaps	%xmm7, %xmm4
  vfmadd213ps	%xmm1, %xmm14, %xmm2
  vmovaps	%xmm6, %xmm1
  vfmadd213ps	%xmm2, %xmm15, %xmm3
  vpermilps	$170, %xmm3, %xmm0
  vmovups	%xmm3, (%rdx,%rax)
  vpermilps	$255, %xmm3, %xmm2
  addq	$16, %rax
  decl	%ecx
  vmovaps	%xmm0, %xmm3
  jne	.LBB0_4

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      2200
# CHECK-NEXT: Total Cycles:      1039
# CHECK-NEXT: Total uOps:        2400

# CHECK:      Dispatch Width:    6
# CHECK-NEXT: uOps Per Cycle:    2.31
# CHECK-NEXT: IPC:               2.12
# CHECK-NEXT: Block RThroughput: 6.0

# CHECK:      Cycles with backend pressure increase [ 92.69% ]
# CHECK-NEXT: Throughput Bottlenecks:
# CHECK-NEXT:   Resource Pressure       [ 46.78% ]
# CHECK-NEXT:   - SKLPort0  [ 14.24% ]
# CHECK-NEXT:   - SKLPort1  [ 14.24% ]
# CHECK-NEXT:   - SKLPort5  [ 46.49% ]
# CHECK-NEXT:   - SKLPort6  [ 8.66% ]
# CHECK-NEXT:   Data Dependencies:      [ 64.97% ]
# CHECK-NEXT:   - Register Dependencies [ 64.97% ]
# CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]

# CHECK:      Critical sequence based on the simulation:

# CHECK:                    Instruction                                 Dependency Information
# CHECK-NEXT:  +----< 18.   addq	$16, %rax
# CHECK-NEXT:  |
# CHECK-NEXT:  |    < loop carried >
# CHECK-NEXT:  |
# CHECK-NEXT:  +----> 0.    vmovups	(%rsi,%rax,2), %xmm0              ## REGISTER dependency:  %rax
# CHECK-NEXT:  |      1.    vpermilps	$255, %xmm0, %xmm7
# CHECK-NEXT:  |      2.    vmulps	-24(%rsp), %xmm7, %xmm8
# CHECK-NEXT:  +----> 3.    vpermilps	$170, %xmm0, %xmm6                ## REGISTER dependency:  %xmm0
# CHECK-NEXT:  |      4.    vpermilps	$85, %xmm0, %xmm5
# CHECK-NEXT:  |      5.    vbroadcastss	%xmm0, %xmm0
# CHECK-NEXT:  +----> 6.    vfmadd231ps	%xmm9, %xmm6, %xmm8       ## REGISTER dependency:  %xmm6
# CHECK-NEXT:  +----> 7.    vfmadd213ps	%xmm8, %xmm10, %xmm5      ## REGISTER dependency:  %xmm8
# CHECK-NEXT:  +----> 8.    vfmadd213ps	%xmm5, %xmm11, %xmm0      ## REGISTER dependency:  %xmm5
# CHECK-NEXT:  +----> 9.    vfmadd213ps	%xmm0, %xmm12, %xmm4      ## REGISTER dependency:  %xmm0
# CHECK-NEXT:  +----> 10.   vfmadd213ps	%xmm4, %xmm13, %xmm1      ## REGISTER dependency:  %xmm4
# CHECK-NEXT:  |      11.   vmovaps	%xmm7, %xmm4
# CHECK-NEXT:  +----> 12.   vfmadd213ps	%xmm1, %xmm14, %xmm2      ## REGISTER dependency:  %xmm1
# CHECK-NEXT:  |      13.   vmovaps	%xmm6, %xmm1
# CHECK-NEXT:  +----> 14.   vfmadd213ps	%xmm2, %xmm15, %xmm3      ## REGISTER dependency:  %xmm2
# CHECK-NEXT:  +----> 15.   vpermilps	$170, %xmm3, %xmm0                ## REGISTER dependency:  %xmm3
# CHECK-NEXT:  |      16.   vmovups	%xmm3, (%rdx,%rax)
# CHECK-NEXT:  |      17.   vpermilps	$255, %xmm3, %xmm2
# CHECK-NEXT:  |      18.   addq	$16, %rax
# CHECK-NEXT:  |      19.   decl	%ecx
# CHECK-NEXT:  +----> 20.   vmovaps	%xmm0, %xmm3                      ## REGISTER dependency:  %xmm0
# CHECK-NEXT:         21.   jne	.LBB0_4

# CHECK:      Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
# CHECK-NEXT:  1      6     0.50    *                   vmovups	(%rsi,%rax,2), %xmm0
# CHECK-NEXT:  1      1     1.00                        vpermilps	$255, %xmm0, %xmm7
# CHECK-NEXT:  2      10    0.50    *                   vmulps	-24(%rsp), %xmm7, %xmm8
# CHECK-NEXT:  1      1     1.00                        vpermilps	$170, %xmm0, %xmm6
# CHECK-NEXT:  1      1     1.00                        vpermilps	$85, %xmm0, %xmm5
# CHECK-NEXT:  1      1     1.00                        vbroadcastss	%xmm0, %xmm0
# CHECK-NEXT:  1      4     0.50                        vfmadd231ps	%xmm9, %xmm6, %xmm8
# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm8, %xmm10, %xmm5
# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm5, %xmm11, %xmm0
# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm0, %xmm12, %xmm4
# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm4, %xmm13, %xmm1
# CHECK-NEXT:  1      1     0.33                        vmovaps	%xmm7, %xmm4
# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm1, %xmm14, %xmm2
# CHECK-NEXT:  1      1     0.33                        vmovaps	%xmm6, %xmm1
# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm2, %xmm15, %xmm3
# CHECK-NEXT:  1      1     1.00                        vpermilps	$170, %xmm3, %xmm0
# CHECK-NEXT:  2      1     1.00           *            vmovups	%xmm3, (%rdx,%rax)
# CHECK-NEXT:  1      1     1.00                        vpermilps	$255, %xmm3, %xmm2
# CHECK-NEXT:  1      1     0.25                        addq	$16, %rax
# CHECK-NEXT:  1      1     0.25                        decl	%ecx
# CHECK-NEXT:  1      1     0.33                        vmovaps	%xmm0, %xmm3
# CHECK-NEXT:  1      1     0.50                        jne	.LBB0_4

# CHECK:      Resources:
# CHECK-NEXT: [0]   - SKLDivider
# CHECK-NEXT: [1]   - SKLFPDivider
# CHECK-NEXT: [2]   - SKLPort0
# CHECK-NEXT: [3]   - SKLPort1
# CHECK-NEXT: [4]   - SKLPort2
# CHECK-NEXT: [5]   - SKLPort3
# CHECK-NEXT: [6]   - SKLPort4
# CHECK-NEXT: [7]   - SKLPort5
# CHECK-NEXT: [8]   - SKLPort6
# CHECK-NEXT: [9]   - SKLPort7

# CHECK:      Resource pressure per iteration:
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
# CHECK-NEXT:  -      -     5.52   5.53   1.01   1.03   1.00   6.02   2.93   0.96

# CHECK:      Resource pressure by instruction:
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
# CHECK-NEXT:  -      -      -      -     0.04   0.96    -      -      -      -     vmovups	(%rsi,%rax,2), %xmm0
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$255, %xmm0, %xmm7
# CHECK-NEXT:  -      -     0.03   0.97   0.96   0.04    -      -      -      -     vmulps	-24(%rsp), %xmm7, %xmm8
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$170, %xmm0, %xmm6
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$85, %xmm0, %xmm5
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastss	%xmm0, %xmm0
# CHECK-NEXT:  -      -     0.95   0.05    -      -      -      -      -      -     vfmadd231ps	%xmm9, %xmm6, %xmm8
# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vfmadd213ps	%xmm8, %xmm10, %xmm5
# CHECK-NEXT:  -      -     0.92   0.08    -      -      -      -      -      -     vfmadd213ps	%xmm5, %xmm11, %xmm0
# CHECK-NEXT:  -      -     0.95   0.05    -      -      -      -      -      -     vfmadd213ps	%xmm0, %xmm12, %xmm4
# CHECK-NEXT:  -      -     0.51   0.49    -      -      -      -      -      -     vfmadd213ps	%xmm4, %xmm13, %xmm1
# CHECK-NEXT:  -      -     0.52   0.48    -      -      -      -      -      -     vmovaps	%xmm7, %xmm4
# CHECK-NEXT:  -      -     0.49   0.51    -      -      -      -      -      -     vfmadd213ps	%xmm1, %xmm14, %xmm2
# CHECK-NEXT:  -      -     0.04   0.95    -      -      -     0.01    -      -     vmovaps	%xmm6, %xmm1
# CHECK-NEXT:  -      -     0.51   0.49    -      -      -      -      -      -     vfmadd213ps	%xmm2, %xmm15, %xmm3
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$170, %xmm3, %xmm0
# CHECK-NEXT:  -      -      -      -     0.01   0.03   1.00    -      -     0.96   vmovups	%xmm3, (%rdx,%rax)
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$255, %xmm3, %xmm2
# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     addq	$16, %rax
# CHECK-NEXT:  -      -     0.04   0.01    -      -      -     0.01   0.94    -     decl	%ecx
# CHECK-NEXT:  -      -     0.05   0.95    -      -      -      -      -      -     vmovaps	%xmm0, %xmm3
# CHECK-NEXT:  -      -     0.01    -      -      -      -      -     0.99    -     jne	.LBB0_4