| # RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s |
| # loads from flat non uniform |
| --- |
| name: flatloads |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| |
| body: | |
| bb.0: |
| ; CHECK-LABEL: MachineUniformityInfo for function: flatloads |
| ; CHECK: DIVERGENT: %1 |
| ; CHECK-NOT: DIVERGENT: %2 |
| %0:vreg_64 = IMPLICIT_DEF |
| %1:vgpr_32(s32) = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) |
| %2:vgpr_32(s32) = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) |
| %3:sreg_32 = V_READFIRSTLANE_B32 %1(s32), implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| # loads from scratch non uniform |
| --- |
| name: scratchloads |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| |
| body: | |
| bb.0: |
| ; CHECK-LABEL: MachineUniformityInfo for function: scratchloads |
| ; CHECK: DIVERGENT: %1 |
| %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| %1:vgpr_32 = SCRATCH_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) |
| S_ENDPGM 0 |
| ... |
| |
| # Global load |
| --- |
| name: globalloads |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| |
| body: | |
| bb.0: |
| ; CHECK-LABEL: MachineUniformityInfo for function: globalloads |
| ; CHECK: DIVERGENT: %2 |
| ; CHECK-NOT: DIVERGENT: %3 |
| %0:vreg_64 = IMPLICIT_DEF |
| %1:vreg_64 = IMPLICIT_DEF |
| %2:vgpr_32(s32) = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) |
| %3:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, implicit $exec :: (load (s64), addrspace 1) |
| %4:sreg_32 = V_READFIRSTLANE_B32 %2(s32), implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| # FIXME:: ADDTID might instruction incorrectly marked uniform |
| --- |
| name: dsreads |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| |
| body: | |
| bb.0: |
| ; CHECK-LABEL: MachineUniformityInfo for function: dsreads |
| ; CHECK-NEXT: ALL VALUES UNIFORM |
| %0:vreg_64 = IMPLICIT_DEF |
| $m0 = S_MOV_B32 0 |
| %1:vgpr_32 = DS_READ_ADDTID_B32 0, 0, implicit $m0, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| # copy source == $sgpr => uniform, $vgpr => divergent |
| --- |
| name: sgprcopy |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| ; CHECK-LABEL: MachineUniformityInfo for function: sgprcopy |
| ; CHECK: DIVERGENT: %2 |
| liveins: $sgpr0,$sgpr1,$vgpr0 |
| %0:sgpr_32 = COPY $sgpr0 |
| %1:vgpr_32 = COPY $sgpr1 |
| %2:vgpr_32 = COPY $vgpr0 |
| S_ENDPGM 0 |
| ... |
| |
| # writelane is not uniform |
| --- |
| name: writelane |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| ; CHECK-LABEL: MachineUniformityInfo for function: writelane |
| ; CHECK: DIVERGENT: %4 |
| |
| ; Note how %5 is the result of a vector compare, but it is reported as |
| ; uniform because it is stored in an sreg. |
| ; CHECK-NOT: DIVERGENT: %5 |
| |
| %0:vgpr_32 = IMPLICIT_DEF |
| %1:vgpr_32 = IMPLICIT_DEF |
| %2:sgpr_32 = V_READFIRSTLANE_B32 %0, implicit $exec |
| %3:sgpr_32 = V_READLANE_B32 %1, 0, implicit $exec |
| $sgpr0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec |
| $sgpr1 = V_READLANE_B32 $vgpr1, $sgpr0, implicit $exec |
| |
| %4:vgpr_32 = V_WRITELANE_B32 0, 0, %0, implicit $exec |
| %5:sreg_64 = V_CMP_EQ_U32_e64 %0, %4, implicit $exec |
| S_CBRANCH_VCCZ %bb.1, implicit $vcc |
| |
| bb.1: |
| %16:vgpr_32 = IMPLICIT_DEF |
| S_ENDPGM 0 |
| ... |
| # Directly reading physical vgpr not uniform |
| --- |
| name: physicalreg |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; CHECK-LABEL: MachineUniformityInfo for function: physicalreg |
| ; CHECK: DIVERGENT: %0 |
| ; CHECK: DIVERGENT: %1 |
| ; CHECK: DIVERGENT: %2 |
| ; CHECK: DIVERGENT: %3 |
| ; CHECK: DIVERGENT: %4 |
| ; CHECK-NOT: DIVERGENT |
| ; CHECK: DIVERGENT: %5 |
| liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 |
| %0:vgpr_32 = COPY $vgpr0 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = V_AND_B32_e32 %1, $vgpr3, implicit $exec |
| %3:vgpr_32 = V_ADD_U32_e32 $vgpr2, $vgpr3, implicit $exec |
| %4:vgpr_32 = V_SUB_CO_U32_e32 $vgpr2, $vgpr3, implicit $exec, implicit-def $vcc |
| %5:vgpr_32 = V_AND_B32_e32 $vgpr4, $vgpr5, implicit $exec |
| S_ENDPGM 0 |
| ... |
| # mbcnt instructions are not uniform |
| --- |
| name: mbcnt_lo |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| ; CHECK-LABEL: MachineUniformityInfo for function: mbcnt_lo |
| ; CHECK: DIVERGENT: %0 |
| %0:vgpr_32 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec |
| S_ENDPGM 0 |
| ... |
| --- |
| name: mbcnt_hi |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| ; CHECK-LABEL: MachineUniformityInfo for function: mbcnt_hi |
| ; CHECK: DIVERGENT: %0 |
| %0:vgpr_32 = V_MBCNT_HI_U32_B32_e64 -1, 0, implicit $exec |
| S_ENDPGM 0 |
| ... |