| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s |
| |
| # Test that we fold correct element from G_UNMERGE_VALUES into fma |
| |
| --- |
| name: test_f32_add_mul |
| body: | |
| bb.1: |
| liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 |
| |
| ; GFX10-LABEL: name: test_f32_add_mul |
| ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1 |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) |
| %0:_(s32) = COPY $vgpr0 |
| %1:_(s32) = COPY $vgpr1 |
| %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %6:_(s32) = G_FMUL %0, %1 |
| %7:_(s32) = G_FADD %6, %el1 |
| $vgpr0 = COPY %7(s32) |
| ... |
| |
| --- |
| name: test_f32_add_mul_rhs |
| machineFunctionInfo: |
| mode: |
| fp32-input-denormals: false |
| fp32-output-denormals: false |
| body: | |
| bb.1: |
| liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 |
| |
| ; GFX10-LABEL: name: test_f32_add_mul_rhs |
| ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1 |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) |
| %0:_(s32) = COPY $vgpr0 |
| %1:_(s32) = COPY $vgpr1 |
| %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %6:_(s32) = G_FMUL %0, %1 |
| %7:_(s32) = G_FADD %el1, %6 |
| $vgpr0 = COPY %7(s32) |
| ... |
| |
| --- |
| name: test_f16_f32_add_ext_mul |
| machineFunctionInfo: |
| mode: |
| fp32-input-denormals: false |
| fp32-output-denormals: false |
| body: | |
| bb.1: |
| liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 |
| |
| ; GFX10-LABEL: name: test_f16_f32_add_ext_mul |
| ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 |
| ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 |
| ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) |
| ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) |
| %0:_(s32) = COPY $sgpr0 |
| %1:_(s16) = G_TRUNC %0(s32) |
| %2:_(s32) = COPY $sgpr1 |
| %3:_(s16) = G_TRUNC %2(s32) |
| %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3 |
| %9:_(s32) = G_FPEXT %8(s16) |
| %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1 |
| $vgpr0 = COPY %10(s32) |
| ... |
| |
| --- |
| name: test_f16_f32_add_ext_mul_rhs |
| machineFunctionInfo: |
| mode: |
| fp32-input-denormals: false |
| fp32-output-denormals: false |
| body: | |
| bb.1: |
| liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 |
| |
| ; GFX10-LABEL: name: test_f16_f32_add_ext_mul_rhs |
| ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 |
| ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 |
| ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) |
| ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) |
| %0:_(s32) = COPY $sgpr0 |
| %1:_(s16) = G_TRUNC %0(s32) |
| %2:_(s32) = COPY $sgpr1 |
| %3:_(s16) = G_TRUNC %2(s32) |
| %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3 |
| %9:_(s32) = G_FPEXT %8(s16) |
| %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9 |
| $vgpr0 = COPY %10(s32) |
| ... |
| |
| --- |
| name: test_f32_add_fma_mul |
| body: | |
| bb.1: |
| liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 |
| |
| ; GFX10-LABEL: name: test_f32_add_fma_mul |
| ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1 |
| ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) |
| %0:_(s32) = COPY $vgpr0 |
| %1:_(s32) = COPY $vgpr1 |
| %2:_(s32) = COPY $vgpr2 |
| %3:_(s32) = COPY $vgpr3 |
| %ptr:_(p1) = COPY $vgpr4_vgpr5 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3 |
| %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8 |
| %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1 |
| $vgpr0 = COPY %10(s32) |
| ... |
| |
| --- |
| name: test_f32_add_fma_mul_rhs |
| body: | |
| bb.1: |
| liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 |
| |
| ; GFX10-LABEL: name: test_f32_add_fma_mul_rhs |
| ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1 |
| ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) |
| %0:_(s32) = COPY $vgpr0 |
| %1:_(s32) = COPY $vgpr1 |
| %2:_(s32) = COPY $vgpr2 |
| %3:_(s32) = COPY $vgpr3 |
| %ptr:_(p1) = COPY $vgpr4_vgpr5 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3 |
| %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8 |
| %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9 |
| $vgpr0 = COPY %10(s32) |
| ... |
| |
| --- |
| name: test_f16_f32_add_fma_ext_mul |
| machineFunctionInfo: |
| mode: |
| fp32-input-denormals: false |
| fp32-output-denormals: false |
| body: | |
| bb.1: |
| liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 |
| |
| ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul |
| ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 |
| ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 |
| ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) |
| ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) |
| ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 |
| ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) |
| %0:_(s32) = COPY $vgpr0 |
| %1:_(s32) = COPY $vgpr1 |
| %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %6:_(s32) = COPY $vgpr4 |
| %7:_(s16) = G_TRUNC %6(s32) |
| %8:_(s32) = COPY $vgpr5 |
| %9:_(s16) = G_TRUNC %8(s32) |
| %10:_(s16) = G_FMUL %7, %9 |
| %11:_(s32) = G_FPEXT %10(s16) |
| %12:_(s32) = G_FMA %0, %1, %11 |
| %13:_(s32) = G_FADD %12, %el1 |
| $vgpr0 = COPY %13(s32) |
| ... |
| |
| --- |
| name: test_f16_f32_add_ext_fma_mul |
| machineFunctionInfo: |
| mode: |
| fp32-input-denormals: false |
| fp32-output-denormals: false |
| body: | |
| bb.1: |
| liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 |
| |
| ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul |
| ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 |
| ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 |
| ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 |
| ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 |
| ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) |
| ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) |
| ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) |
| ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) |
| ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1 |
| ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) |
| %0:_(s32) = COPY $vgpr0 |
| %1:_(s16) = G_TRUNC %0(s32) |
| %2:_(s32) = COPY $vgpr1 |
| %3:_(s16) = G_TRUNC %2(s32) |
| %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %8:_(s32) = COPY $vgpr4 |
| %9:_(s16) = G_TRUNC %8(s32) |
| %10:_(s32) = COPY $vgpr5 |
| %11:_(s16) = G_TRUNC %10(s32) |
| %12:_(s16) = G_FMUL %9, %11 |
| %13:_(s16) = G_FMUL %1, %3 |
| %14:_(s16) = G_FADD %13, %12 |
| %15:_(s32) = G_FPEXT %14(s16) |
| %16:_(s32) = G_FADD %15, %el1 |
| $vgpr0 = COPY %16(s32) |
| ... |
| |
| --- |
| name: test_f16_f32_add_fma_ext_mul_rhs |
| machineFunctionInfo: |
| mode: |
| fp32-input-denormals: false |
| fp32-output-denormals: false |
| body: | |
| bb.1: |
| liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 |
| |
| ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul_rhs |
| ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 |
| ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 |
| ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) |
| ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) |
| ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 |
| ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) |
| %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %4:_(s32) = COPY $vgpr2 |
| %5:_(s32) = COPY $vgpr3 |
| %6:_(s32) = COPY $vgpr4 |
| %7:_(s16) = G_TRUNC %6(s32) |
| %8:_(s32) = COPY $vgpr5 |
| %9:_(s16) = G_TRUNC %8(s32) |
| %10:_(s16) = G_FMUL %7, %9 |
| %11:_(s32) = G_FPEXT %10(s16) |
| %12:_(s32) = G_FMA %4, %5, %11 |
| %13:_(s32) = G_FADD %el1, %12 |
| $vgpr0 = COPY %13(s32) |
| ... |
| |
| --- |
| name: test_f16_f32_add_ext_fma_mul_rhs |
| machineFunctionInfo: |
| mode: |
| fp32-input-denormals: false |
| fp32-output-denormals: false |
| body: | |
| bb.1: |
| liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 |
| |
| ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul_rhs |
| ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2 |
| ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 |
| ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 |
| ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 |
| ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) |
| ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) |
| ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) |
| ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) |
| ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1 |
| ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) |
| %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %4:_(s32) = COPY $vgpr2 |
| %5:_(s16) = G_TRUNC %4(s32) |
| %6:_(s32) = COPY $vgpr3 |
| %7:_(s16) = G_TRUNC %6(s32) |
| %8:_(s32) = COPY $vgpr4 |
| %9:_(s16) = G_TRUNC %8(s32) |
| %10:_(s32) = COPY $vgpr5 |
| %11:_(s16) = G_TRUNC %10(s32) |
| %12:_(s16) = G_FMUL %9, %11 |
| %13:_(s16) = G_FMUL %5, %7 |
| %14:_(s16) = G_FADD %13, %12 |
| %15:_(s32) = G_FPEXT %14(s16) |
| %16:_(s32) = G_FADD %el1, %15 |
| $vgpr0 = COPY %16(s32) |
| ... |
| |
| --- |
| name: test_f32_sub_mul |
| machineFunctionInfo: |
| mode: |
| fp32-input-denormals: false |
| fp32-output-denormals: false |
| body: | |
| bb.1: |
| liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 |
| |
| ; GFX10-LABEL: name: test_f32_sub_mul |
| ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %el1 |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FNEG]] |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) |
| %0:_(s32) = COPY $vgpr0 |
| %1:_(s32) = COPY $vgpr1 |
| %ptr:_(p1) = COPY $vgpr0_vgpr1 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %6:_(s32) = G_FMUL %0, %1 |
| %7:_(s32) = G_FSUB %6, %el1 |
| $vgpr0 = COPY %7(s32) |
| ... |
| |
| --- |
| name: test_f32_sub_mul_rhs |
| machineFunctionInfo: |
| mode: |
| fp32-input-denormals: false |
| fp32-output-denormals: false |
| body: | |
| bb.1: |
| liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 |
| |
| ; GFX10-LABEL: name: test_f32_sub_mul_rhs |
| ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 |
| ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] |
| ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[COPY1]], %el1 |
| ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) |
| %0:_(s32) = COPY $vgpr0 |
| %1:_(s32) = COPY $vgpr1 |
| %ptr:_(p1) = COPY $vgpr2_vgpr3 |
| %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) |
| %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) |
| %6:_(s32) = G_FMUL %0, %1 |
| %7:_(s32) = G_FSUB %el1, %6 |
| $vgpr0 = COPY %7(s32) |
| ... |