| //===- NVPTX.cpp ----------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "ABIInfoImpl.h" |
| #include "TargetInfo.h" |
| #include "llvm/IR/IntrinsicsNVPTX.h" |
| |
| using namespace clang; |
| using namespace clang::CodeGen; |
| |
| //===----------------------------------------------------------------------===// |
| // NVPTX ABI Implementation |
| //===----------------------------------------------------------------------===// |
| |
| namespace { |
| |
| class NVPTXTargetCodeGenInfo; |
| |
| class NVPTXABIInfo : public ABIInfo { |
| NVPTXTargetCodeGenInfo &CGInfo; |
| |
| public: |
| NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) |
| : ABIInfo(CGT), CGInfo(Info) {} |
| |
| ABIArgInfo classifyReturnType(QualType RetTy) const; |
| ABIArgInfo classifyArgumentType(QualType Ty) const; |
| |
| void computeInfo(CGFunctionInfo &FI) const override; |
| Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, |
| QualType Ty) const override; |
| bool isUnsupportedType(QualType T) const; |
| ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; |
| }; |
| |
| class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { |
| public: |
| NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) |
| : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {} |
| |
| void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, |
| CodeGen::CodeGenModule &M) const override; |
| bool shouldEmitStaticExternCAliases() const override; |
| |
| llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { |
| // On the device side, surface reference is represented as an object handle |
| // in 64-bit integer. |
| return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); |
| } |
| |
| llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { |
| // On the device side, texture reference is represented as an object handle |
| // in 64-bit integer. |
| return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); |
| } |
| |
| bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
| LValue Src) const override { |
| emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
| return true; |
| } |
| |
| bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
| LValue Src) const override { |
| emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
| return true; |
| } |
| |
| private: |
| // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the |
| // resulting MDNode to the nvvm.annotations MDNode. |
| static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, |
| int Operand); |
| |
| static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
| LValue Src) { |
| llvm::Value *Handle = nullptr; |
| llvm::Constant *C = |
| llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer()); |
| // Lookup `addrspacecast` through the constant pointer if any. |
| if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) |
| C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); |
| if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { |
| // Load the handle from the specific global variable using |
| // `nvvm.texsurf.handle.internal` intrinsic. |
| Handle = CGF.EmitRuntimeCall( |
| CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, |
| {GV->getType()}), |
| {GV}, "texsurf_handle"); |
| } else |
| Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); |
| CGF.EmitStoreOfScalar(Handle, Dst); |
| } |
| }; |
| |
| /// Checks if the type is unsupported directly by the current target. |
| bool NVPTXABIInfo::isUnsupportedType(QualType T) const { |
| ASTContext &Context = getContext(); |
| if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) |
| return true; |
| if (!Context.getTargetInfo().hasFloat128Type() && |
| (T->isFloat128Type() || |
| (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) |
| return true; |
| if (const auto *EIT = T->getAs<BitIntType>()) |
| return EIT->getNumBits() > |
| (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); |
| if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && |
| Context.getTypeSize(T) > 64U) |
| return true; |
| if (const auto *AT = T->getAsArrayTypeUnsafe()) |
| return isUnsupportedType(AT->getElementType()); |
| const auto *RT = T->getAs<RecordType>(); |
| if (!RT) |
| return false; |
| const RecordDecl *RD = RT->getDecl(); |
| |
| // If this is a C++ record, check the bases first. |
| if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) |
| for (const CXXBaseSpecifier &I : CXXRD->bases()) |
| if (isUnsupportedType(I.getType())) |
| return true; |
| |
| for (const FieldDecl *I : RD->fields()) |
| if (isUnsupportedType(I->getType())) |
| return true; |
| return false; |
| } |
| |
| /// Coerce the given type into an array with maximum allowed size of elements. |
| ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, |
| unsigned MaxSize) const { |
| // Alignment and Size are measured in bits. |
| const uint64_t Size = getContext().getTypeSize(Ty); |
| const uint64_t Alignment = getContext().getTypeAlign(Ty); |
| const unsigned Div = std::min<unsigned>(MaxSize, Alignment); |
| llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div); |
| const uint64_t NumElements = (Size + Div - 1) / Div; |
| return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); |
| } |
| |
| ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { |
| if (RetTy->isVoidType()) |
| return ABIArgInfo::getIgnore(); |
| |
| if (getContext().getLangOpts().OpenMP && |
| getContext().getLangOpts().OpenMPIsTargetDevice && |
| isUnsupportedType(RetTy)) |
| return coerceToIntArrayWithLimit(RetTy, 64); |
| |
| // note: this is different from default ABI |
| if (!RetTy->isScalarType()) |
| return ABIArgInfo::getDirect(); |
| |
| // Treat an enum type as its underlying type. |
| if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) |
| RetTy = EnumTy->getDecl()->getIntegerType(); |
| |
| return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) |
| : ABIArgInfo::getDirect()); |
| } |
| |
| ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { |
| // Treat an enum type as its underlying type. |
| if (const EnumType *EnumTy = Ty->getAs<EnumType>()) |
| Ty = EnumTy->getDecl()->getIntegerType(); |
| |
| // Return aggregates type as indirect by value |
| if (isAggregateTypeForABI(Ty)) { |
| // Under CUDA device compilation, tex/surf builtin types are replaced with |
| // object types and passed directly. |
| if (getContext().getLangOpts().CUDAIsDevice) { |
| if (Ty->isCUDADeviceBuiltinSurfaceType()) |
| return ABIArgInfo::getDirect( |
| CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); |
| if (Ty->isCUDADeviceBuiltinTextureType()) |
| return ABIArgInfo::getDirect( |
| CGInfo.getCUDADeviceBuiltinTextureDeviceType()); |
| } |
| return getNaturalAlignIndirect(Ty, /* byval */ true); |
| } |
| |
| if (const auto *EIT = Ty->getAs<BitIntType>()) { |
| if ((EIT->getNumBits() > 128) || |
| (!getContext().getTargetInfo().hasInt128Type() && |
| EIT->getNumBits() > 64)) |
| return getNaturalAlignIndirect(Ty, /* byval */ true); |
| } |
| |
| return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) |
| : ABIArgInfo::getDirect()); |
| } |
| |
| void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { |
| if (!getCXXABI().classifyReturnType(FI)) |
| FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); |
| for (auto &I : FI.arguments()) |
| I.info = classifyArgumentType(I.type); |
| |
| // Always honor user-specified calling convention. |
| if (FI.getCallingConvention() != llvm::CallingConv::C) |
| return; |
| |
| FI.setEffectiveCallingConvention(getRuntimeCC()); |
| } |
| |
| Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, |
| QualType Ty) const { |
| llvm_unreachable("NVPTX does not support varargs"); |
| } |
| |
| void NVPTXTargetCodeGenInfo::setTargetAttributes( |
| const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { |
| if (GV->isDeclaration()) |
| return; |
| const VarDecl *VD = dyn_cast_or_null<VarDecl>(D); |
| if (VD) { |
| if (M.getLangOpts().CUDA) { |
| if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) |
| addNVVMMetadata(GV, "surface", 1); |
| else if (VD->getType()->isCUDADeviceBuiltinTextureType()) |
| addNVVMMetadata(GV, "texture", 1); |
| return; |
| } |
| } |
| |
| const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); |
| if (!FD) return; |
| |
| llvm::Function *F = cast<llvm::Function>(GV); |
| |
| // Perform special handling in OpenCL mode |
| if (M.getLangOpts().OpenCL) { |
| // Use OpenCL function attributes to check for kernel functions |
| // By default, all functions are device functions |
| if (FD->hasAttr<OpenCLKernelAttr>()) { |
| // OpenCL __kernel functions get kernel metadata |
| // Create !{<func-ref>, metadata !"kernel", i32 1} node |
| addNVVMMetadata(F, "kernel", 1); |
| // And kernel functions are not subject to inlining |
| F->addFnAttr(llvm::Attribute::NoInline); |
| } |
| } |
| |
| // Perform special handling in CUDA mode. |
| if (M.getLangOpts().CUDA) { |
| // CUDA __global__ functions get a kernel metadata entry. Since |
| // __global__ functions cannot be called from the device, we do not |
| // need to set the noinline attribute. |
| if (FD->hasAttr<CUDAGlobalAttr>()) { |
| // Create !{<func-ref>, metadata !"kernel", i32 1} node |
| addNVVMMetadata(F, "kernel", 1); |
| } |
| if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) { |
| // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node |
| llvm::APSInt MaxThreads(32); |
| MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext()); |
| if (MaxThreads > 0) |
| addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); |
| |
| // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was |
| // not specified in __launch_bounds__ or if the user specified a 0 value, |
| // we don't have to add a PTX directive. |
| if (Attr->getMinBlocks()) { |
| llvm::APSInt MinBlocks(32); |
| MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext()); |
| if (MinBlocks > 0) |
| // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node |
| addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); |
| } |
| } |
| } |
| |
| // Attach kernel metadata directly if compiling for NVPTX. |
| if (FD->hasAttr<NVPTXKernelAttr>()) { |
| addNVVMMetadata(F, "kernel", 1); |
| } |
| } |
| |
| void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, |
| StringRef Name, int Operand) { |
| llvm::Module *M = GV->getParent(); |
| llvm::LLVMContext &Ctx = M->getContext(); |
| |
| // Get "nvvm.annotations" metadata node |
| llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); |
| |
| llvm::Metadata *MDVals[] = { |
| llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), |
| llvm::ConstantAsMetadata::get( |
| llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; |
| // Append metadata to nvvm.annotations |
| MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); |
| } |
| |
| bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { |
| return false; |
| } |
| } |
| |
| std::unique_ptr<TargetCodeGenInfo> |
| CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { |
| return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes()); |
| } |