blob: 1ca0192333a0f8f11acb30c9e82b6b8940a6ed7a [file] [log] [blame]
//===- NVPTX.cpp ----------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
using namespace clang;
using namespace clang::CodeGen;
//===----------------------------------------------------------------------===//
// NVPTX ABI Implementation
//===----------------------------------------------------------------------===//
namespace {
class NVPTXTargetCodeGenInfo;
class NVPTXABIInfo : public ABIInfo {
NVPTXTargetCodeGenInfo &CGInfo;
public:
NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
: ABIInfo(CGT), CGInfo(Info) {}
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
bool isUnsupportedType(QualType T) const;
ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
};
class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
public:
NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
bool shouldEmitStaticExternCAliases() const override;
llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
// On the device side, surface reference is represented as an object handle
// in 64-bit integer.
return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
}
llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
// On the device side, texture reference is represented as an object handle
// in 64-bit integer.
return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
}
bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) const override {
emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
return true;
}
bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) const override {
emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
return true;
}
private:
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand);
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) {
llvm::Value *Handle = nullptr;
llvm::Constant *C =
llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
// Lookup `addrspacecast` through the constant pointer if any.
if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
// Load the handle from the specific global variable using
// `nvvm.texsurf.handle.internal` intrinsic.
Handle = CGF.EmitRuntimeCall(
CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
{GV->getType()}),
{GV}, "texsurf_handle");
} else
Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
CGF.EmitStoreOfScalar(Handle, Dst);
}
};
/// Checks if the type is unsupported directly by the current target.
bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
ASTContext &Context = getContext();
if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
return true;
if (!Context.getTargetInfo().hasFloat128Type() &&
(T->isFloat128Type() ||
(T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
return true;
if (const auto *EIT = T->getAs<BitIntType>())
return EIT->getNumBits() >
(Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
Context.getTypeSize(T) > 64U)
return true;
if (const auto *AT = T->getAsArrayTypeUnsafe())
return isUnsupportedType(AT->getElementType());
const auto *RT = T->getAs<RecordType>();
if (!RT)
return false;
const RecordDecl *RD = RT->getDecl();
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
for (const CXXBaseSpecifier &I : CXXRD->bases())
if (isUnsupportedType(I.getType()))
return true;
for (const FieldDecl *I : RD->fields())
if (isUnsupportedType(I->getType()))
return true;
return false;
}
/// Coerce the given type into an array with maximum allowed size of elements.
ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
unsigned MaxSize) const {
// Alignment and Size are measured in bits.
const uint64_t Size = getContext().getTypeSize(Ty);
const uint64_t Alignment = getContext().getTypeAlign(Ty);
const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
const uint64_t NumElements = (Size + Div - 1) / Div;
return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
}
ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
if (getContext().getLangOpts().OpenMP &&
getContext().getLangOpts().OpenMPIsTargetDevice &&
isUnsupportedType(RetTy))
return coerceToIntArrayWithLimit(RetTy, 64);
// note: this is different from default ABI
if (!RetTy->isScalarType())
return ABIArgInfo::getDirect();
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
: ABIArgInfo::getDirect());
}
ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
// Return aggregates type as indirect by value
if (isAggregateTypeForABI(Ty)) {
// Under CUDA device compilation, tex/surf builtin types are replaced with
// object types and passed directly.
if (getContext().getLangOpts().CUDAIsDevice) {
if (Ty->isCUDADeviceBuiltinSurfaceType())
return ABIArgInfo::getDirect(
CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
if (Ty->isCUDADeviceBuiltinTextureType())
return ABIArgInfo::getDirect(
CGInfo.getCUDADeviceBuiltinTextureDeviceType());
}
return getNaturalAlignIndirect(Ty, /* byval */ true);
}
if (const auto *EIT = Ty->getAs<BitIntType>()) {
if ((EIT->getNumBits() > 128) ||
(!getContext().getTargetInfo().hasInt128Type() &&
EIT->getNumBits() > 64))
return getNaturalAlignIndirect(Ty, /* byval */ true);
}
return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());
}
void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type);
// Always honor user-specified calling convention.
if (FI.getCallingConvention() != llvm::CallingConv::C)
return;
FI.setEffectiveCallingConvention(getRuntimeCC());
}
Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
llvm_unreachable("NVPTX does not support varargs");
}
void NVPTXTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (GV->isDeclaration())
return;
const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
if (VD) {
if (M.getLangOpts().CUDA) {
if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
addNVVMMetadata(GV, "surface", 1);
else if (VD->getType()->isCUDADeviceBuiltinTextureType())
addNVVMMetadata(GV, "texture", 1);
return;
}
}
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
llvm::Function *F = cast<llvm::Function>(GV);
// Perform special handling in OpenCL mode
if (M.getLangOpts().OpenCL) {
// Use OpenCL function attributes to check for kernel functions
// By default, all functions are device functions
if (FD->hasAttr<OpenCLKernelAttr>()) {
// OpenCL __kernel functions get kernel metadata
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1);
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
}
}
// Perform special handling in CUDA mode.
if (M.getLangOpts().CUDA) {
// CUDA __global__ functions get a kernel metadata entry. Since
// __global__ functions cannot be called from the device, we do not
// need to set the noinline attribute.
if (FD->hasAttr<CUDAGlobalAttr>()) {
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
llvm::APSInt MaxThreads(32);
MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
if (MaxThreads > 0)
addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
// min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
// not specified in __launch_bounds__ or if the user specified a 0 value,
// we don't have to add a PTX directive.
if (Attr->getMinBlocks()) {
llvm::APSInt MinBlocks(32);
MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
if (MinBlocks > 0)
// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
}
}
}
// Attach kernel metadata directly if compiling for NVPTX.
if (FD->hasAttr<NVPTXKernelAttr>()) {
addNVVMMetadata(F, "kernel", 1);
}
}
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
StringRef Name, int Operand) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();
// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
llvm::Metadata *MDVals[] = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
}
std::unique_ptr<TargetCodeGenInfo>
CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
}