loop-unroll.mlir 12 KB
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2' | FileCheck %s --check-prefix UNROLL-BY-2
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2

func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
                          %arg3: memref<?xf32>) {
  %0 = constant 7.0 : f32
  scf.for %i0 = %arg0 to %arg1 step %arg2 {
    store %0, %arg3[%i0] : memref<?xf32>
  }
  return
}
// UNROLL-BY-2-LABEL: func @dynamic_loop_unroll
//  UNROLL-BY-2-SAME:  %[[LB:.*0]]: index,
//  UNROLL-BY-2-SAME:  %[[UB:.*1]]: index,
//  UNROLL-BY-2-SAME:  %[[STEP:.*2]]: index,
//  UNROLL-BY-2-SAME:  %[[MEM:.*3]]: memref<?xf32>
//
//   UNROLL-BY-2-DAG:  %[[V0:.*]] = subi %[[UB]], %[[LB]] : index
//   UNROLL-BY-2-DAG:  %[[C1:.*]] = constant 1 : index
//   UNROLL-BY-2-DAG:  %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index
//   UNROLL-BY-2-DAG:  %[[V2:.*]] = addi %[[V0]], %[[V1]] : index
//       Compute trip count in V3.
//   UNROLL-BY-2-DAG:  %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index
//       Store unroll factor in C2.
//   UNROLL-BY-2-DAG:  %[[C2:.*]] = constant 2 : index
//   UNROLL-BY-2-DAG:  %[[V4:.*]] = remi_signed %[[V3]], %[[C2]] : index
//   UNROLL-BY-2-DAG:  %[[V5:.*]] = subi %[[V3]], %[[V4]] : index
//   UNROLL-BY-2-DAG:  %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index
//       Compute upper bound of unrolled loop in V7.
//   UNROLL-BY-2-DAG:  %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
//       Compute step of unrolled loop in V8.
//   UNROLL-BY-2-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C2]] : index
//       UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
//  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
//  UNROLL-BY-2-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
//  UNROLL-BY-2-NEXT:    %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
//  UNROLL-BY-2-NEXT:  }
//  UNROLL-BY-2-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
//  UNROLL-BY-2-NEXT:  }
//  UNROLL-BY-2-NEXT:  return

// UNROLL-BY-3-LABEL: func @dynamic_loop_unroll
//  UNROLL-BY-3-SAME:  %[[LB:.*0]]: index,
//  UNROLL-BY-3-SAME:  %[[UB:.*1]]: index,
//  UNROLL-BY-3-SAME:  %[[STEP:.*2]]: index,
//  UNROLL-BY-3-SAME:  %[[MEM:.*3]]: memref<?xf32>
//
//   UNROLL-BY-3-DAG:  %[[V0:.*]] = subi %[[UB]], %[[LB]] : index
//   UNROLL-BY-3-DAG:  %[[C1:.*]] = constant 1 : index
//   UNROLL-BY-3-DAG:  %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index
//   UNROLL-BY-3-DAG:  %[[V2:.*]] = addi %[[V0]], %[[V1]] : index
//       Compute trip count in V3.
//   UNROLL-BY-3-DAG:  %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index
//       Store unroll factor in C3.
//   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
//   UNROLL-BY-3-DAG:  %[[V4:.*]] = remi_signed %[[V3]], %[[C3]] : index
//   UNROLL-BY-3-DAG:  %[[V5:.*]] = subi %[[V3]], %[[V4]] : index
//   UNROLL-BY-3-DAG:  %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index
//       Compute upper bound of unrolled loop in V7.
//   UNROLL-BY-3-DAG:  %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
//       Compute step of unrolled loop in V8.
//   UNROLL-BY-3-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C3]] : index
//       UNROLL-BY-3:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
//  UNROLL-BY-3-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
//  UNROLL-BY-3-NEXT:    %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = constant 2 : index
//  UNROLL-BY-3-NEXT:    %[[V11:.*]] = muli %[[STEP]], %[[C2_IV]] : index
//  UNROLL-BY-3-NEXT:    %[[V12:.*]] = addi %[[IV]], %[[V11]] : index
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V12]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:  }
//  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:  }
//  UNROLL-BY-3-NEXT:  return

func @dynamic_loop_unroll_outer_by_2(
  %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
  %arg5 : index, %arg6: memref<?xf32>) {
  %0 = constant 7.0 : f32
  scf.for %i0 = %arg0 to %arg1 step %arg2 {
    scf.for %i1 = %arg3 to %arg4 step %arg5 {
     store %0, %arg6[%i1] : memref<?xf32>
    }
  }
  return
}
// UNROLL-OUTER-BY-2-LABEL: func @dynamic_loop_unroll_outer_by_2
//  UNROLL-OUTER-BY-2-SAME:  %[[LB0:.*0]]: index,
//  UNROLL-OUTER-BY-2-SAME:  %[[UB0:.*1]]: index,
//  UNROLL-OUTER-BY-2-SAME:  %[[STEP0:.*2]]: index,
//  UNROLL-OUTER-BY-2-SAME:  %[[LB1:.*3]]: index,
//  UNROLL-OUTER-BY-2-SAME:  %[[UB1:.*4]]: index,
//  UNROLL-OUTER-BY-2-SAME:  %[[STEP1:.*5]]: index,
//  UNROLL-OUTER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
//
//       UNROLL-OUTER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} {
//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
//  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
//  UNROLL-OUTER-BY-2-NEXT:    }
//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
//  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
//  UNROLL-OUTER-BY-2-NEXT:    }
//  UNROLL-OUTER-BY-2-NEXT:  }
//  UNROLL-OUTER-BY-2-NEXT:  scf.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] {
//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
//  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
//  UNROLL-OUTER-BY-2-NEXT:    }
//  UNROLL-OUTER-BY-2-NEXT:  }
//  UNROLL-OUTER-BY-2-NEXT:  return

func @dynamic_loop_unroll_inner_by_2(
  %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
  %arg5 : index, %arg6: memref<?xf32>) {
  %0 = constant 7.0 : f32
  scf.for %i0 = %arg0 to %arg1 step %arg2 {
    scf.for %i1 = %arg3 to %arg4 step %arg5 {
     store %0, %arg6[%i1] : memref<?xf32>
    }
  }
  return
}
// UNROLL-INNER-BY-2-LABEL: func @dynamic_loop_unroll_inner_by_2
//  UNROLL-INNER-BY-2-SAME:  %[[LB0:.*0]]: index,
//  UNROLL-INNER-BY-2-SAME:  %[[UB0:.*1]]: index,
//  UNROLL-INNER-BY-2-SAME:  %[[STEP0:.*2]]: index,
//  UNROLL-INNER-BY-2-SAME:  %[[LB1:.*3]]: index,
//  UNROLL-INNER-BY-2-SAME:  %[[UB1:.*4]]: index,
//  UNROLL-INNER-BY-2-SAME:  %[[STEP1:.*5]]: index,
//  UNROLL-INNER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
//
//       UNROLL-INNER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
//       UNROLL-INNER-BY-2:    scf.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} {
//  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
//  UNROLL-INNER-BY-2-NEXT:      %[[C1_IV:.*]] = constant 1 : index
//  UNROLL-INNER-BY-2-NEXT:      %[[V0:.*]] = muli %[[STEP1]], %[[C1_IV]] : index
//  UNROLL-INNER-BY-2-NEXT:      %[[V1:.*]] = addi %[[IV1]], %[[V0]] : index
//  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
//  UNROLL-INNER-BY-2-NEXT:    }
//  UNROLL-INNER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] {
//  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
//  UNROLL-INNER-BY-2-NEXT:    }
//  UNROLL-INNER-BY-2-NEXT:  }
//  UNROLL-INNER-BY-2-NEXT:  return

// Test that no epilogue clean-up loop is generated because the trip count is
// a multiple of the unroll factor.
func @static_loop_unroll_by_2(%arg0 : memref<?xf32>) {
  %0 = constant 7.0 : f32
  %lb = constant 0 : index
  %ub = constant 20 : index
  %step = constant 1 : index
  scf.for %i0 = %lb to %ub step %step {
    store %0, %arg0[%i0] : memref<?xf32>
  }
  return
}
// UNROLL-BY-2-LABEL: func @static_loop_unroll_by_2
//  UNROLL-BY-2-SAME:  %[[MEM:.*0]]: memref<?xf32>
//
//   UNROLL-BY-2-DAG:  %[[C0:.*]] = constant 0 : index
//   UNROLL-BY-2-DAG:  %[[C1:.*]] = constant 1 : index
//   UNROLL-BY-2-DAG:  %[[C20:.*]] = constant 20 : index
//   UNROLL-BY-2-DAG:  %[[C2:.*]] = constant 2 : index
//   UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] {
//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
//  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
//  UNROLL-BY-2-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
//  UNROLL-BY-2-NEXT:    %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
//  UNROLL-BY-2-NEXT:  }
//  UNROLL-BY-2-NEXT:  return

// Test that epilogue clean up loop is generated (trip count is not
// a multiple of unroll factor).
func @static_loop_unroll_by_3(%arg0 : memref<?xf32>) {
  %0 = constant 7.0 : f32
  %lb = constant 0 : index
  %ub = constant 20 : index
  %step = constant 1 : index
  scf.for %i0 = %lb to %ub step %step {
    store %0, %arg0[%i0] : memref<?xf32>
  }
  return
}

// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3
//  UNROLL-BY-3-SAME:  %[[MEM:.*0]]: memref<?xf32>
//
//   UNROLL-BY-3-DAG:  %[[C0:.*]] = constant 0 : index
//   UNROLL-BY-3-DAG:  %[[C1:.*]] = constant 1 : index
//   UNROLL-BY-3-DAG:  %[[C20:.*]] = constant 20 : index
//   UNROLL-BY-3-DAG:  %[[C18:.*]] = constant 18 : index
//   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
//       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] {
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
//  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
//  UNROLL-BY-3-NEXT:    %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = constant 2 : index
//  UNROLL-BY-3-NEXT:    %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index
//  UNROLL-BY-3-NEXT:    %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:  }
//  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] {
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:  }
//  UNROLL-BY-3-NEXT:  return

// Test that the single iteration epilogue loop body is promoted to the loops
// containing block.
func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref<?xf32>) {
  %0 = constant 7.0 : f32
  %lb = constant 0 : index
  %ub = constant 10 : index
  %step = constant 1 : index
  scf.for %i0 = %lb to %ub step %step {
    store %0, %arg0[%i0] : memref<?xf32>
  }
  return
}
// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_promote_epilogue
//  UNROLL-BY-3-SAME:  %[[MEM:.*0]]: memref<?xf32>
//
//   UNROLL-BY-3-DAG:  %[[C0:.*]] = constant 0 : index
//   UNROLL-BY-3-DAG:  %[[C1:.*]] = constant 1 : index
//   UNROLL-BY-3-DAG:  %[[C10:.*]] = constant 10 : index
//   UNROLL-BY-3-DAG:  %[[C9:.*]] = constant 9 : index
//   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
//       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] {
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
//  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
//  UNROLL-BY-3-NEXT:    %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = constant 2 : index
//  UNROLL-BY-3-NEXT:    %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index
//  UNROLL-BY-3-NEXT:    %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:  }
//  UNROLL-BY-3-NEXT:  store %{{.*}}, %[[MEM]][%[[C9]]] : memref<?xf32>
//  UNROLL-BY-3-NEXT:  return